16 #ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
17 #define TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
20 #include "cuda_runtime.h"
27 template<
typename AFloat>
28 class TCudaDeviceBuffer;
41 template<
typename AFloat>
48 mutable cudaStream_t fComputeStream;
49 std::shared_ptr<AFloat *> fHostPointer;
54 TDestructor() =
default;
55 TDestructor(
const TDestructor &) =
default;
56 TDestructor( TDestructor &&) =
default;
57 TDestructor & operator=(
const TDestructor &) =
default;
58 TDestructor & operator=( TDestructor &&) =
default;
59 void operator()(AFloat ** devicePointer);
62 friend TCudaDeviceBuffer<AFloat>;
66 TCudaHostBuffer(
size_t size);
67 TCudaHostBuffer(AFloat *);
68 TCudaHostBuffer() =
default;
69 TCudaHostBuffer(
const TCudaHostBuffer &) =
default;
70 TCudaHostBuffer( TCudaHostBuffer &&) =
default;
71 TCudaHostBuffer & operator=(
const TCudaHostBuffer &) =
default;
72 TCudaHostBuffer & operator=( TCudaHostBuffer &&) =
default;
75 TCudaHostBuffer GetSubBuffer(
size_t offset,
size_t size);
77 void SetConstVal(
const AFloat constVal);
79 operator AFloat * ()
const;
81 inline AFloat & operator[](
size_t index);
82 inline AFloat operator[](
size_t index)
const;
84 size_t GetSize()
const {
return fSize;}
98 template<
typename AFloat>
99 class TCudaDeviceBuffer
105 cudaStream_t fComputeStream;
106 std::shared_ptr<AFloat *> fDevicePointer;
111 TDestructor() =
default;
112 TDestructor(
const TDestructor &) =
default;
113 TDestructor( TDestructor &&) =
default;
114 TDestructor & operator=(
const TDestructor &) =
default;
115 TDestructor & operator=( TDestructor &&) =
default;
116 void operator()(AFloat ** devicePointer);
117 friend TCudaDeviceBuffer;
122 TCudaDeviceBuffer(
size_t size);
123 TCudaDeviceBuffer(
size_t size, cudaStream_t stream);
124 TCudaDeviceBuffer(AFloat *,
size_t size, cudaStream_t stream);
125 TCudaDeviceBuffer() =
default;
126 TCudaDeviceBuffer(
const TCudaDeviceBuffer &) =
default;
127 TCudaDeviceBuffer( TCudaDeviceBuffer &&) =
default;
128 TCudaDeviceBuffer & operator=(
const TCudaDeviceBuffer &) =
default;
129 TCudaDeviceBuffer & operator=( TCudaDeviceBuffer &&) =
default;
132 TCudaDeviceBuffer GetSubBuffer(
size_t offset,
size_t size);
134 operator AFloat * ()
const;
136 void CopyFrom(
const TCudaHostBuffer<AFloat> &)
const;
137 void CopyTo(
const TCudaHostBuffer<AFloat> &)
const;
139 size_t GetSize()
const {
return fSize;}
140 cudaStream_t GetComputeStream()
const {
return fComputeStream;}
141 void SetComputeStream(cudaStream_t stream) {fComputeStream = stream;}
143 size_t GetUseCount()
const {
return fDevicePointer.use_count(); }
151 template<
typename AFloat>
152 AFloat & TCudaHostBuffer<AFloat>::operator[](
size_t index)
154 return (*fHostPointer + fOffset)[index];
157 template<
typename AFloat>
158 AFloat TCudaHostBuffer<AFloat>::operator[](
size_t index)
const
160 return (*fHostPointer + fOffset)[index];