Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
CudaBuffers.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 07/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////
13 // Device and host buffer for CUDA architectures. //
14 ////////////////////////////////////////////////////
15 
16 #ifndef TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
17 #define TMVA_DNN_ARCHITECTURES_CUDA_CUDABUFFERS
18 
19 #include "cuda.h"
20 #include "cuda_runtime.h"
21 
22 #include <memory>
23 
24 namespace TMVA {
25 namespace DNN {
26 
27 template<typename AFloat>
28 class TCudaDeviceBuffer;
29 
30 /** TCudaHostBuffer
31  *
32  * Wrapper class for pinned memory buffers on the host. Uses
33  * std::shared_pointer with custom destructor to ensure consistent
34  * memory management and allow for easy copying/moving of the
35  * buffers. Copying is asynchronous and will set the cudaStream of the
36  * device buffer so that subsequent computations on the device buffer
37  * can be performed on the same stream.
38  *
39  * \tparam AFloat The floating point type to be stored in the buffers.
40  */
41 template<typename AFloat>
42 class TCudaHostBuffer
43 {
44 private:
45 
46  size_t fOffset; ///< Offset for sub-buffers
47  size_t fSize;
48  mutable cudaStream_t fComputeStream; ///< cudaStream for data transfer
49  std::shared_ptr<AFloat *> fHostPointer; ///< Pointer to the buffer data
50 
51  // Custom destructor required to free pinned host memory using cudaFree.
52  struct TDestructor
53  {
54  TDestructor() = default;
55  TDestructor(const TDestructor &) = default;
56  TDestructor( TDestructor &&) = default;
57  TDestructor & operator=(const TDestructor &) = default;
58  TDestructor & operator=( TDestructor &&) = default;
59  void operator()(AFloat ** devicePointer);
60  } fDestructor;
61 
62  friend TCudaDeviceBuffer<AFloat>;
63 
64 public:
65 
66  TCudaHostBuffer(size_t size);
67  TCudaHostBuffer(AFloat *);
68  TCudaHostBuffer() = default;
69  TCudaHostBuffer(const TCudaHostBuffer &) = default;
70  TCudaHostBuffer( TCudaHostBuffer &&) = default;
71  TCudaHostBuffer & operator=(const TCudaHostBuffer &) = default;
72  TCudaHostBuffer & operator=( TCudaHostBuffer &&) = default;
73 
74  /** Return sub-buffer of the current buffer. */
75  TCudaHostBuffer GetSubBuffer(size_t offset, size_t size);
76  /** Sets the entire buffer to a constant value */
77  void SetConstVal(const AFloat constVal);
78 
79  operator AFloat * () const;
80 
81  inline AFloat & operator[](size_t index);
82  inline AFloat operator[](size_t index) const;
83 
84  size_t GetSize() const {return fSize;}
85 
86 };
87 
88 /** TCudaDeviceBuffer
89  *
90  * Service class for on-device memory buffers. Uses
91  * std::shared_pointer with custom destructor to ensure consistent
92  * memory management and allow for easy copying/moving. A device
93  * buffer has an associated CUDA compute stream , which is used for
94  * implicit synchronization of data transfers.
95  *
96  * \tparam AFloat The floating point type to be stored in the buffers.
97  */
98 template<typename AFloat>
99 class TCudaDeviceBuffer
100 {
101 private:
102 
103  size_t fOffset; ///< Offset for sub-buffers
104  size_t fSize;
105  cudaStream_t fComputeStream; ///< cudaStream for data transfer
106  std::shared_ptr<AFloat *> fDevicePointer; ///< Pointer to the buffer data
107 
108  // Custom destructor required to free pinned host memory using cudaFree.
109  struct TDestructor
110  {
111  TDestructor() = default;
112  TDestructor(const TDestructor &) = default;
113  TDestructor( TDestructor &&) = default;
114  TDestructor & operator=(const TDestructor &) = default;
115  TDestructor & operator=( TDestructor &&) = default;
116  void operator()(AFloat ** devicePointer);
117  friend TCudaDeviceBuffer;
118  } fDestructor;
119 
120 public:
121 
122  TCudaDeviceBuffer(size_t size);
123  TCudaDeviceBuffer(size_t size, cudaStream_t stream);
124  TCudaDeviceBuffer(AFloat *, size_t size, cudaStream_t stream);
125  TCudaDeviceBuffer() = default;
126  TCudaDeviceBuffer(const TCudaDeviceBuffer &) = default;
127  TCudaDeviceBuffer( TCudaDeviceBuffer &&) = default;
128  TCudaDeviceBuffer & operator=(const TCudaDeviceBuffer &) = default;
129  TCudaDeviceBuffer & operator=( TCudaDeviceBuffer &&) = default;
130 
131  /** Return sub-buffer of the current buffer. */
132  TCudaDeviceBuffer GetSubBuffer(size_t offset, size_t size);
133  /** Convert to raw device data pointer.*/
134  operator AFloat * () const;
135 
136  void CopyFrom(const TCudaHostBuffer<AFloat> &) const;
137  void CopyTo(const TCudaHostBuffer<AFloat> &) const;
138 
139  size_t GetSize() const {return fSize;}
140  cudaStream_t GetComputeStream() const {return fComputeStream;}
141  void SetComputeStream(cudaStream_t stream) {fComputeStream = stream;}
142 
143  size_t GetUseCount() const { return fDevicePointer.use_count(); }
144 
145 };
146 
147 //
148 // Inline Functions.
149 //______________________________________________________________________________
150 
151 template<typename AFloat>
152 AFloat & TCudaHostBuffer<AFloat>::operator[](size_t index)
153 {
154  return (*fHostPointer + fOffset)[index];
155 }
156 
157 template<typename AFloat>
158 AFloat TCudaHostBuffer<AFloat>::operator[](size_t index) const
159 {
160  return (*fHostPointer + fOffset)[index];
161 }
162 
163 
164 } // namespace DNN
165 } // namespace TMVA
166 #endif