Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TensorDataLoader.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Lorenzo Moneta,
3 
4 
5 ////////////////////////////////////////////////////////////////////////
6 // Implementation of TensorDataLoader functions for CUDA with CuDNN architecture. //
7 ////////////////////////////////////////////////////////////////////////
8 
9 #include "TMVA/DataSetInfo.h"
10 
13 
15 
16 
17 
18 #include "cuda_runtime.h"
19 #include <algorithm>
20 
21 namespace TMVA {
22 namespace DNN {
23 
24 //______________________________________________________________________________
25 //
26 // cuDNN
27 //______________________________________________________________________________
28 template <>
29 void TTensorDataLoader<TensorInput, TCudnn<float> >::CopyTensorInput(TCudaHostBuffer<float> &buffer,
30  IndexIterator_t sampleIterator)
31 {
32  const std::vector<TMatrixT<Double_t> > &inputTensor = std::get<0>(fData);
33 
34  if (fBatchDepth == 1) {
35  for (size_t i = 0; i < fBatchHeight; i++) {
36  size_t sampleIndex = *sampleIterator;
37  for (size_t j = 0; j < fBatchWidth; j++) {
38  size_t bufferIndex = j * fBatchHeight + i;
39  buffer[bufferIndex] = static_cast<float>(inputTensor[0](sampleIndex, j));
40  }
41  sampleIterator++;
42  }
43  } else {
44  for (size_t i = 0; i < fBatchDepth; i++) {
45  size_t sampleIndex = *sampleIterator;
46  for (size_t j = 0; j < fBatchHeight; j++) {
47  for (size_t k = 0; k < fBatchWidth; k++) {
48  size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
49  buffer[bufferIndex] = static_cast<float>(inputTensor[sampleIndex](j, k));
50  }
51  }
52  sampleIterator++;
53  }
54  }
55 }
56 
57 //______________________________________________________________________________
58 template <>
59 void TTensorDataLoader<TensorInput, TCudnn<float> >::CopyTensorOutput(TCudaHostBuffer<float> &buffer,
60  IndexIterator_t sampleIterator)
61 {
62  const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
63  size_t n = outputMatrix.GetNcols();
64 
65  for (size_t i = 0; i < fBatchSize; i++) {
66  size_t sampleIndex = *sampleIterator;
67  for (size_t j = 0; j < n; j++) {
68  size_t bufferIndex = j * fBatchSize + i;
69  buffer[bufferIndex] = static_cast<float>(outputMatrix(sampleIndex, j));
70  }
71  sampleIterator++;
72  }
73 }
74 
75 //______________________________________________________________________________
76 template <>
77 void TTensorDataLoader<TensorInput, TCudnn<float> >::CopyTensorWeights(TCudaHostBuffer<float> &buffer,
78  IndexIterator_t sampleIterator)
79 {
80  const TMatrixT<Double_t> &weightMatrix = std::get<2>(fData);
81 
82  for (size_t i = 0; i < fBatchSize; i++) {
83  buffer[i] = static_cast<float>(weightMatrix(*sampleIterator, 0));
84  sampleIterator++;
85  }
86 }
87 
88 //______________________________________________________________________________
89 template <>
90 void TTensorDataLoader<TMVAInput_t, TCudnn<float> >::CopyTensorInput(TCudaHostBuffer<float> &buffer,
91  IndexIterator_t sampleIterator)
92 {
93  // Image has channel depth 1 -> they are ordered as row-vectors in a matrix (batchHeight = batchSize)
94  // one event, one example in the batch
95  if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
96  for (size_t i = 0; i < fBatchHeight; i++) {
97  size_t sampleIndex = *sampleIterator;
98  Event * event = std::get<0>(fData)[sampleIndex];
99  for (size_t j = 0; j < fBatchWidth; j++) {
100  size_t bufferIndex = j * fBatchHeight + i;
101  buffer[bufferIndex] = event->GetValue(j);
102  }
103  sampleIterator++;
104  }
105  // A batch is made up by a single image with its channels
106  } else if (fBatchDepth == fBatchSize) {
107  for (size_t i = 0; i < fBatchSize; i++) {
108  size_t sampleIndex = *sampleIterator;
109  Event * event = std::get<0>(fData)[sampleIndex];
110  for (size_t j = 0; j < fBatchHeight; j++) {
111  for (size_t k = 0; k < fBatchWidth; k++) {
112  // Cudnn order is NCHW
113  size_t bufferIndex = i * fBatchHeight * fBatchWidth + j * fBatchWidth + k;
114  buffer[bufferIndex] = event->GetValue(j * fBatchWidth + k);
115  }
116  }
117  sampleIterator++;
118  }
119  }
120  else {
121  std::cout << fBatchDepth << fBatchSize << fBatchHeight << std::endl;
122  Error("TTensorDataLoader","Inconsistency between batch depth and batch size");
123  R__ASSERT(0);
124  }
125 }
126 //______________________________________________________________________________
127 template <>
128 void TTensorDataLoader<TMVAInput_t, TCudnn<float> >::CopyTensorOutput(TCudaHostBuffer<float> &buffer,
129  IndexIterator_t sampleIterator)
130 {
131  const DataSetInfo &info = std::get<1>(fData);
132  size_t n = buffer.GetSize() / fBatchSize;
133 
134  // Copy target(s).
135  for (size_t i = 0; i < fBatchSize; i++) {
136  size_t sampleIndex = *sampleIterator++;
137  Event *event = std::get<0>(fData)[sampleIndex];
138  for (size_t j = 0; j < n; j++) {
139  // Copy output matrices.
140  size_t bufferIndex = j * fBatchSize + i;
141  // Classification
142  if (event->GetNTargets() == 0) {
143  if (n == 1) {
144  // Binary.
145  buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
146  } else {
147  // Multiclass.
148  buffer[bufferIndex] = 0.0;
149  if (j == event->GetClass()) {
150  buffer[bufferIndex] = 1.0;
151  }
152  }
153  } else {
154  buffer[bufferIndex] = static_cast<Float_t>(event->GetTarget(j));
155  }
156  }
157  }
158 }
159 
160 //______________________________________________________________________________
161 template <>
162 void TTensorDataLoader<TMVAInput_t, TCudnn<float> >::CopyTensorWeights(TCudaHostBuffer<float> &buffer,
163  IndexIterator_t sampleIterator)
164 {
165  for (size_t i = 0; i < fBatchSize; i++) {
166  size_t sampleIndex = *sampleIterator++;
167  Event *event = std::get<0>(fData)[sampleIndex];
168  buffer[i] = event->GetWeight();
169  }
170 }
171 
172 //______________________________________________________________________________
173 template <>
174 void TTensorDataLoader<TensorInput, TCudnn<double> >::CopyTensorInput(TCudaHostBuffer<double> &buffer,
175  IndexIterator_t sampleIterator)
176 {
177  const std::vector<TMatrixT<Double_t> > &inputTensor = std::get<0>(fData);
178 
179  if (fBatchDepth == 1) {
180  for (size_t i = 0; i < fBatchHeight; i++) {
181  size_t sampleIndex = *sampleIterator;
182  for (size_t j = 0; j < fBatchWidth; j++) {
183  size_t bufferIndex = j * fBatchHeight + i;
184  buffer[bufferIndex] = static_cast<double>(inputTensor[0](sampleIndex, j));
185  }
186  sampleIterator++;
187  }
188  } else {
189  for (size_t i = 0; i < fBatchDepth; i++) {
190  size_t sampleIndex = *sampleIterator;
191  for (size_t j = 0; j < fBatchHeight; j++) {
192  for (size_t k = 0; k < fBatchWidth; k++) {
193  size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
194  buffer[bufferIndex] = static_cast<double>(inputTensor[sampleIndex](j, k));
195  }
196  }
197  sampleIterator++;
198  }
199  }
200 }
201 
202 //______________________________________________________________________________
203 template <>
204 void TTensorDataLoader<TensorInput, TCudnn<double> >::CopyTensorOutput(TCudaHostBuffer<double> &buffer,
205  IndexIterator_t sampleIterator)
206 {
207  const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
208  size_t n = outputMatrix.GetNcols();
209 
210  for (size_t i = 0; i < fBatchSize; i++) {
211  size_t sampleIndex = *sampleIterator;
212  for (size_t j = 0; j < n; j++) {
213  size_t bufferIndex = j * fBatchSize + i;
214  buffer[bufferIndex] = outputMatrix(sampleIndex, j);
215  }
216  sampleIterator++;
217  }
218 }
219 
220 //______________________________________________________________________________
221 template <>
222 void TTensorDataLoader<TensorInput, TCudnn<double> >::CopyTensorWeights(TCudaHostBuffer<double> &buffer,
223  IndexIterator_t sampleIterator)
224 {
225  const TMatrixT<Double_t> &weightMatrix = std::get<2>(fData);
226  for (size_t i = 0; i < fBatchSize; i++) {
227  buffer[i] = weightMatrix(*sampleIterator, 0);
228  sampleIterator++;
229  }
230 }
231 
232 //______________________________________________________________________________
233 template <>
234 void TTensorDataLoader<TMVAInput_t, TCudnn<double> >::CopyTensorInput(TCudaHostBuffer<double> &buffer,
235  IndexIterator_t sampleIterator)
236 {
237  // one event, one example in the batch
238  if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
239  for (size_t i = 0; i < fBatchHeight; i++) {
240  size_t sampleIndex = *sampleIterator;
241  Event * event = std::get<0>(fData)[sampleIndex];
242  for (size_t j = 0; j < fBatchWidth; j++) {
243  size_t bufferIndex = j * fBatchHeight + i;
244  buffer[bufferIndex] = event->GetValue(j);
245  }
246  sampleIterator++;
247  }
248  } else if (fBatchDepth == fBatchSize) {
249  // batchDepth is batch size
250  for (size_t i = 0; i < fBatchDepth; i++) {
251  size_t sampleIndex = *sampleIterator;
252  Event * event = std::get<0>(fData)[sampleIndex];
253  for (size_t j = 0; j < fBatchHeight; j++) {
254  for (size_t k = 0; k < fBatchWidth; k++) {
255  // because of the column-major ordering
256  size_t bufferIndex = i * fBatchHeight * fBatchWidth + j * fBatchWidth + k;
257  buffer[bufferIndex] = event->GetValue(j * fBatchWidth + k);
258  }
259  }
260  sampleIterator++;
261  }
262  }
263  else {
264  Error("TTensorDataLoader","Inconsistency between batch depth and batch size");
265  R__ASSERT(0);
266  }
267 }
268 
269 //______________________________________________________________________________
270 template <>
271 void TTensorDataLoader<TMVAInput_t, TCudnn<double> >::CopyTensorOutput(TCudaHostBuffer<double> &buffer,
272  IndexIterator_t sampleIterator)
273 {
274  const DataSetInfo &info = std::get<1>(fData);
275  size_t n = buffer.GetSize() / fBatchSize;
276 
277  // Copy target(s).
278 
279  for (size_t i = 0; i < fBatchSize; i++) {
280  size_t sampleIndex = *sampleIterator++;
281  Event *event = std::get<0>(fData)[sampleIndex];
282  for (size_t j = 0; j < n; j++) {
283  // Copy output matrices.
284  size_t bufferIndex = j * fBatchSize + i;
285  // Classification
286  if (event->GetNTargets() == 0) {
287  if (n == 1) {
288  // Binary.
289  buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
290  } else {
291  // Multiclass.
292  buffer[bufferIndex] = 0.0;
293  if (j == event->GetClass()) {
294  buffer[bufferIndex] = 1.0;
295  }
296  }
297  } else {
298  buffer[bufferIndex] = static_cast<Double_t>(event->GetTarget(j));
299  }
300  }
301  }
302 }
303 
304 //______________________________________________________________________________
305 template <>
306 void TTensorDataLoader<TMVAInput_t, TCudnn<double> >::CopyTensorWeights(TCudaHostBuffer<double> &buffer,
307  IndexIterator_t sampleIterator)
308 {
309  for (size_t i = 0; i < fBatchSize; i++) {
310  size_t sampleIndex = *sampleIterator++;
311  Event *event = std::get<0>(fData)[sampleIndex];
312  buffer[i] = event->GetWeight();
313  }
314 }
315 
316 #if 0
317 //______________________________________________________________________________
318 template <>
319 TTensorBatch<TCudnn<float> > TTensorDataLoader<TensorInput, TCudnn<float> >::GetTensorBatch()
320 {
321  // Get buffer tuple on device that contains the data
322  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
323 
324  std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
325  std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
326  std::vector<TCudaTensor<float> > inputTensor(1, TCudaTensor<float>(std::get<0>(DeviceBuffers),
327  this->GetTensorDim(), fInputShape));
328  TCudaTensor<float> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
329  TCudaTensor<float> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
330 
331  fBatchIndex++;
332  return TTensorBatch<TCudnn<float> >(inputTensor, outputMatrix, weightMatrix);
333 }
334 
335 //______________________________________________________________________________
336 template <>
337 TTensorBatch<TCudnn<double> > TTensorDataLoader<TensorInput, TCudnn<double> >::GetTensorBatch()
338 {
339  // Get buffer tuple on device that contains the data
340  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
341 
342  std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
343  std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
344  std::vector<TCudaTensor<double> > inputTensor(1, TCudaTensor<double>(std::get<0>(DeviceBuffers),
345  this->GetTensorDim(), fInputShape));
346  TCudaTensor<double> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
347  TCudaTensor<double> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
348 
349  fBatchIndex++;
350  return TTensorBatch<TCudnn<double> >(inputTensor, outputMatrix, weightMatrix);
351 }
352 
353 //______________________________________________________________________________
354 template <>
355 TTensorBatch<TCudnn<float> > TTensorDataLoader<TMVAInput_t, TCudnn<float> >::GetTensorBatch()
356 {
357  // Get buffer tuple on device that contains the data
358  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
359 
360  std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
361  std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
362  std::vector<TCudaTensor<float> > inputTensor(1, TCudaTensor<float>(std::get<0>(DeviceBuffers),
363  this->GetTensorDim(), fInputShape));
364  TCudaTensor<float> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
365  TCudaTensor<float> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
366 
367  fBatchIndex++;
368  return TTensorBatch<TCudnn<float> >(inputTensor, outputMatrix, weightMatrix);
369 }
370 
371 //______________________________________________________________________________
372 template <>
373 TTensorBatch<TCudnn<double> > TTensorDataLoader<TMVAInput_t, TCudnn<double> >::GetTensorBatch()
374 {
375  // Get buffer tuple on device that contains the data
376  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
377 
378  std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
379  std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
380  std::vector<TCudaTensor<double> > inputTensor(1, TCudaTensor<double>(std::get<0>(DeviceBuffers),
381  this->GetTensorDim(), fInputShape));
382  TCudaTensor<double> outputMatrix(std::get<1>(DeviceBuffers), fNOutputFeatures + 2, outputShape);
383  TCudaTensor<double> weightMatrix(std::get<2>(DeviceBuffers), 3, wheightShape);
384 
385  fBatchIndex++;
386  return TTensorBatch<TCudnn<double> >(inputTensor, outputMatrix, weightMatrix);
387 }
388 #endif
389 
390 
391 //______________________________________________________________________________
392 // Explicit Instantiations.
393 
394 template class TTensorDataLoader<TensorInput, TCudnn<float> >;
395 template class TTensorDataLoader<TMVAInput_t, TCudnn<float> >;
396 template class TTensorDataLoader<TensorInput, TCudnn<double> >;
397 template class TTensorDataLoader<TMVAInput_t, TCudnn<double> >;
398 
399 } // TMVA
400 } // DNN