Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
CpuBuffer.cxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 12/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /////////////////////////////////////////////////////////////
13 // CPU Buffer interface class for the generic data loader. //
14 /////////////////////////////////////////////////////////////
15 
16 #include <vector>
17 #include <memory>
18 #include "TMVA/DataSetInfo.h"
19 #include "TMVA/DNN/DataLoader.h"
22 #include "Rtypes.h"
23 #include <iostream>
24 
25 namespace TMVA {
26 namespace DNN {
27 
28 //______________________________________________________________________________
29 template <typename AReal>
30 void TCpuBuffer<AReal>::TDestructor::operator()(AReal **pointer)
31 {
32  delete[] * pointer;
33  delete[] pointer;
34 }
35 
36 //______________________________________________________________________________
37 template <typename AReal>
38 TCpuBuffer<AReal>::TCpuBuffer(size_t size) : fSize(size), fOffset(0)
39 {
40  AReal **pointer = new AReal *[1];
41  *pointer = new AReal[size];
42  fBuffer = std::shared_ptr<AReal *>(pointer, fDestructor);
43 }
44 
45 //______________________________________________________________________________
46 template <typename AReal>
47 TCpuBuffer<AReal> TCpuBuffer<AReal>::GetSubBuffer(size_t offset, size_t size) const
48 {
49  TCpuBuffer buffer = *this;
50  buffer.fOffset = offset;
51  buffer.fSize = size;
52  return buffer;
53 }
54 
55 //______________________________________________________________________________
56 template <typename AReal>
57 void TCpuBuffer<AReal>::CopyFrom(const TCpuBuffer &other)
58 {
59  //std::copy*this->fBuffer, *other.fBuffer);
60  std::copy( *other.fBuffer, *other.fBuffer+fSize, *this->fBuffer);
61 }
62 
63 //______________________________________________________________________________
64 template <typename AReal>
65 void TCpuBuffer<AReal>::CopyTo(TCpuBuffer &other) const
66 {
67  std::copy( *this->fBuffer, *this->fBuffer+fSize, *other.fBuffer);
68  //std::swap(*this->fBuffer, *other.fBuffer);
69 }
70 
71 //______________________________________________________________________________
72 template <>
73 void TDataLoader<MatrixInput_t, TCpu<Float_t>>::CopyInput(TCpuBuffer<Float_t> &buffer, IndexIterator_t sampleIterator,
74  size_t batchSize)
75 {
76  const TMatrixT<Float_t> &inputMatrix = std::get<0>(fData);
77  size_t n = inputMatrix.GetNcols();
78 
79  for (size_t i = 0; i < batchSize; i++) {
80  size_t sampleIndex = *sampleIterator;
81  for (size_t j = 0; j < n; j++) {
82  size_t bufferIndex = j * batchSize + i;
83  buffer[bufferIndex] = static_cast<Float_t>(inputMatrix(sampleIndex, j));
84  }
85  sampleIterator++;
86  }
87 }
88 
89 //______________________________________________________________________________
90 template <>
91 void TDataLoader<MatrixInput_t, TCpu<Float_t>>::CopyOutput(TCpuBuffer<Float_t> &buffer, IndexIterator_t sampleIterator,
92  size_t batchSize)
93 {
94  const TMatrixT<Float_t> &outputMatrix = std::get<1>(fData);
95  size_t n = outputMatrix.GetNcols();
96 
97  for (size_t i = 0; i < batchSize; i++) {
98  size_t sampleIndex = *sampleIterator;
99  for (size_t j = 0; j < n; j++) {
100  size_t bufferIndex = j * batchSize + i;
101  buffer[bufferIndex] = static_cast<Float_t>(outputMatrix(sampleIndex, j));
102  }
103  sampleIterator++;
104  }
105 }
106 
107 //______________________________________________________________________________
108 template <>
109 void TDataLoader<MatrixInput_t, TCpu<Float_t>>::CopyWeights(TCpuBuffer<Float_t> &buffer, IndexIterator_t sampleIterator,
110  size_t batchSize)
111 {
112  const TMatrixT<Float_t> &outputMatrix = std::get<2>(fData);
113 
114  for (size_t i = 0; i < batchSize; i++) {
115  size_t sampleIndex = *sampleIterator;
116  buffer[i] = static_cast<Float_t>(outputMatrix(sampleIndex, 0));
117  sampleIterator++;
118  }
119 }
120 
121 //______________________________________________________________________________
122 template <>
123 void TDataLoader<MatrixInput_t, TCpu<Double_t>>::CopyInput(TCpuBuffer<Double_t> &buffer, IndexIterator_t sampleIterator,
124  size_t batchSize)
125 {
126  const TMatrixT<Double_t> &inputMatrix = std::get<0>(fData);
127  size_t n = inputMatrix.GetNcols();
128 
129  for (size_t i = 0; i < batchSize; i++) {
130  size_t sampleIndex = *sampleIterator;
131  for (size_t j = 0; j < n; j++) {
132  size_t bufferIndex = j * batchSize + i;
133  buffer[bufferIndex] = inputMatrix(sampleIndex, j);
134  }
135  sampleIterator++;
136  }
137 }
138 
139 //______________________________________________________________________________
140 template <>
141 void TDataLoader<MatrixInput_t, TCpu<Double_t>>::CopyOutput(TCpuBuffer<Double_t> &buffer,
142  IndexIterator_t sampleIterator, size_t batchSize)
143 {
144  const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
145  size_t n = outputMatrix.GetNcols();
146 
147  for (size_t i = 0; i < batchSize; i++) {
148  size_t sampleIndex = *sampleIterator;
149  for (size_t j = 0; j < n; j++) {
150  size_t bufferIndex = j * batchSize + i;
151  buffer[bufferIndex] = outputMatrix(sampleIndex, j);
152  }
153  sampleIterator++;
154  }
155 }
156 
157 //______________________________________________________________________________
158 template <>
159 void TDataLoader<MatrixInput_t, TCpu<Double_t>>::CopyWeights(TCpuBuffer<Double_t> &buffer,
160  IndexIterator_t sampleIterator, size_t batchSize)
161 {
162  const TMatrixT<Double_t> &outputMatrix = std::get<2>(fData);
163 
164  for (size_t i = 0; i < batchSize; i++) {
165  size_t sampleIndex = *sampleIterator;
166  buffer[i] = static_cast<Double_t>(outputMatrix(sampleIndex, 0));
167  sampleIterator++;
168  }
169 }
170 
171 //______________________________________________________________________________
172 template <>
173 void TDataLoader<TMVAInput_t, TCpu<Double_t>>::CopyInput(TCpuBuffer<Double_t> &buffer, IndexIterator_t sampleIterator,
174  size_t batchSize)
175 {
176  Event *event = std::get<0>(fData)[0];
177  size_t n = event->GetNVariables();
178  for (size_t i = 0; i < batchSize; i++) {
179  size_t sampleIndex = * sampleIterator++;
180  event = std::get<0>(fData)[sampleIndex];
181  for (size_t j = 0; j < n; j++) {
182  size_t bufferIndex = j * batchSize + i;
183  buffer[bufferIndex] = event->GetValue(j);
184  }
185  }
186 }
187 
188 //______________________________________________________________________________
189 template <>
190 void TDataLoader<TMVAInput_t, TCpu<Double_t>>::CopyOutput(TCpuBuffer<Double_t> &buffer, IndexIterator_t sampleIterator,
191  size_t batchSize)
192 {
193  const DataSetInfo &info = std::get<1>(fData);
194  size_t n = buffer.GetSize() / batchSize;
195 
196  // Copy target(s).
197 
198  for (size_t i = 0; i < batchSize; i++) {
199  size_t sampleIndex = *sampleIterator++;
200  Event *event = std::get<0>(fData)[sampleIndex];
201  for (size_t j = 0; j < n; j++) {
202  // Copy output matrices.
203  size_t bufferIndex = j * batchSize + i;
204  // Classification
205  if (event->GetNTargets() == 0) {
206  if (n == 1) {
207  // Binary.
208  buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
209  } else {
210  // Multiclass.
211  buffer[bufferIndex] = 0.0;
212  if (j == event->GetClass()) {
213  buffer[bufferIndex] = 1.0;
214  }
215  }
216  } else {
217  buffer[bufferIndex] = static_cast<Float_t>(event->GetTarget(j));
218  }
219  }
220  }
221 }
222 
223 //______________________________________________________________________________
224 template <>
225 void TDataLoader<TMVAInput_t, TCpu<Double_t>>::CopyWeights(TCpuBuffer<Double_t> &buffer, IndexIterator_t sampleIterator,
226  size_t batchSize)
227 {
228  for (size_t i = 0; i < batchSize; i++) {
229  size_t sampleIndex = *sampleIterator++;
230  Event *event = std::get<0>(fData)[sampleIndex];
231  buffer[i] = event->GetWeight();
232  }
233 }
234 
235 //______________________________________________________________________________
236 template <>
237 void TDataLoader<TMVAInput_t, TCpu<Float_t>>::CopyInput(TCpuBuffer<Float_t> &buffer, IndexIterator_t sampleIterator,
238  size_t batchSize)
239 {
240  Event *event = std::get<0>(fData)[0];
241  size_t n = event->GetNVariables();
242  for (size_t i = 0; i < batchSize; i++) {
243  size_t sampleIndex = * sampleIterator++;
244  event = std::get<0>(fData)[sampleIndex];
245  for (size_t j = 0; j < n; j++) {
246  size_t bufferIndex = j * batchSize + i;
247  buffer[bufferIndex] = static_cast<Float_t>(event->GetValue(j));
248  }
249  }
250 }
251 
252 //______________________________________________________________________________
253 template <>
254 void TDataLoader<TMVAInput_t, TCpu<Float_t>>::CopyOutput(TCpuBuffer<Float_t> &buffer, IndexIterator_t sampleIterator,
255  size_t batchSize)
256 {
257  const DataSetInfo &info = std::get<1>(fData);
258  size_t n = buffer.GetSize() / batchSize;
259 
260  // Copy target(s).
261 
262  for (size_t i = 0; i < batchSize; i++) {
263  size_t sampleIndex = *sampleIterator++;
264  Event *event = std::get<0>(fData)[sampleIndex];
265  for (size_t j = 0; j < n; j++) {
266  // Copy output matrices.
267  size_t bufferIndex = j * batchSize + i;
268  // Classification
269  if (event->GetNTargets() == 0) {
270  if (n == 1) {
271  // Binary.
272  buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
273  } else {
274  // Multiclass.
275  buffer[bufferIndex] = 0.0;
276  if (j == event->GetClass()) {
277  buffer[bufferIndex] = 1.0;
278  }
279  }
280  } else {
281  buffer[bufferIndex] = static_cast<Float_t>(event->GetTarget(j));
282  }
283  }
284  }
285 }
286 
287 //______________________________________________________________________________
288 template <>
289 void TDataLoader<TMVAInput_t, TCpu<Float_t>>::CopyWeights(TCpuBuffer<Float_t> &buffer, IndexIterator_t sampleIterator,
290  size_t batchSize)
291 {
292  for (size_t i = 0; i < batchSize; i++) {
293  size_t sampleIndex = *sampleIterator++;
294  Event *event = std::get<0>(fData)[sampleIndex];
295  buffer[i] = static_cast<Float_t>(event->GetWeight());
296  }
297 }
298 
299 //______________________________________________________________________________
300 template <>
301 void TTensorDataLoader<TensorInput, TCpu<Float_t>>::CopyTensorInput(TCpuBuffer<Float_t> &buffer,
302  IndexIterator_t sampleIterator)
303 {
304  const std::vector<TMatrixT<Double_t>> &inputTensor = std::get<0>(fData);
305 
306  if (fBatchDepth == 1) {
307  for (size_t i = 0; i < fBatchHeight; i++) {
308  size_t sampleIndex = *sampleIterator;
309  for (size_t j = 0; j < fBatchWidth; j++) {
310  size_t bufferIndex = j * fBatchHeight + i;
311  buffer[bufferIndex] = static_cast<Float_t>(inputTensor[0](sampleIndex, j));
312  }
313  sampleIterator++;
314  }
315  } else {
316  for (size_t i = 0; i < fBatchDepth; i++) {
317  size_t sampleIndex = *sampleIterator;
318  for (size_t j = 0; j < fBatchHeight; j++) {
319  for (size_t k = 0; k < fBatchWidth; k++) {
320  size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
321  buffer[bufferIndex] = static_cast<Float_t>(inputTensor[sampleIndex](j, k));
322  }
323  }
324  sampleIterator++;
325  }
326  }
327 }
328 
329 //______________________________________________________________________________
330 template <>
331 void TTensorDataLoader<TensorInput, TCpu<Float_t>>::CopyTensorOutput(TCpuBuffer<Float_t> &buffer,
332  IndexIterator_t sampleIterator)
333 {
334  const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
335  size_t n = outputMatrix.GetNcols();
336 
337  for (size_t i = 0; i < fBatchSize; i++) {
338  size_t sampleIndex = *sampleIterator;
339  for (size_t j = 0; j < n; j++) {
340  size_t bufferIndex = j * fBatchSize + i;
341  buffer[bufferIndex] = static_cast<Float_t>(outputMatrix(sampleIndex, j));
342  }
343  sampleIterator++;
344  }
345 }
346 
347 //______________________________________________________________________________
348 template <>
349 void TTensorDataLoader<TensorInput, TCpu<Float_t>>::CopyTensorWeights(TCpuBuffer<Float_t> &buffer,
350  IndexIterator_t sampleIterator)
351 {
352  const TMatrixT<Double_t> &outputMatrix = std::get<2>(fData);
353 
354  for (size_t i = 0; i < fBatchSize; i++) {
355  size_t sampleIndex = *sampleIterator;
356  buffer[i] = static_cast<Float_t>(outputMatrix(sampleIndex, 0));
357  sampleIterator++;
358  }
359 }
360 
361 #if 0
362 //______________________________________________________________________________
363 template <>
364 TTensorBatch<TCpu<Float_t> > TTensorDataLoader<TensorInput, TCpu<Float_t> >::GetTensorBatch()
365 {
366  // After copying the data to the device, wrap the device buffer in the respective
367  // architectures matrix type
368  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
369 
370  Tensor_t inputTensor( std::get<0>(DeviceBuffers), { fBatchHeight, fBatchWidth, fBatchSize } );
371  // size_t jump = fBatchHeight * fBatchWidth;
372  // for (size_t i = 0; i < fBatchSize; i++) {
373  // DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
374  // inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
375  // }
376  Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
377  Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
378 
379  fBatchIndex++;
380  return TTensorBatch<TCpu<Float_t> >(inputTensor, outputMatrix, weightMatrix);
381 }
382 #endif
383 
384 //______________________________________________________________________________
385 template <>
386 void TTensorDataLoader<TensorInput, TCpu<Double_t>>::CopyTensorInput(TCpuBuffer<Double_t> &buffer,
387  IndexIterator_t sampleIterator)
388 {
389  const std::vector<TMatrixT<Double_t>> &inputTensor = std::get<0>(fData);
390 
391  if (fBatchDepth == 1) {
392  for (size_t i = 0; i < fBatchHeight; i++) {
393  size_t sampleIndex = *sampleIterator;
394  for (size_t j = 0; j < fBatchWidth; j++) {
395  size_t bufferIndex = j * fBatchHeight + i;
396  buffer[bufferIndex] = inputTensor[0](sampleIndex, j);
397  }
398  sampleIterator++;
399  }
400  } else {
401  for (size_t i = 0; i < fBatchDepth; i++) {
402  size_t sampleIndex = *sampleIterator;
403  for (size_t j = 0; j < fBatchHeight; j++) {
404  for (size_t k = 0; k < fBatchWidth; k++) {
405  size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
406  buffer[bufferIndex] = inputTensor[sampleIndex](j, k);
407  }
408  }
409  sampleIterator++;
410  }
411  }
412 }
413 
414 //______________________________________________________________________________
415 template <>
416 void TTensorDataLoader<TensorInput, TCpu<Double_t>>::CopyTensorOutput(TCpuBuffer<Double_t> &buffer,
417  IndexIterator_t sampleIterator)
418 {
419  const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
420  size_t n = outputMatrix.GetNcols();
421 
422  for (size_t i = 0; i < fBatchSize; i++) {
423  size_t sampleIndex = *sampleIterator;
424  for (size_t j = 0; j < n; j++) {
425  size_t bufferIndex = j * fBatchSize + i;
426  buffer[bufferIndex] = outputMatrix(sampleIndex, j);
427  }
428  sampleIterator++;
429  }
430 }
431 
432 //______________________________________________________________________________
433 template <>
434 void TTensorDataLoader<TensorInput, TCpu<Double_t>>::CopyTensorWeights(TCpuBuffer<Double_t> &buffer,
435  IndexIterator_t sampleIterator)
436 {
437  const TMatrixT<Double_t> &outputMatrix = std::get<2>(fData);
438 
439  for (size_t i = 0; i < fBatchSize; i++) {
440  size_t sampleIndex = *sampleIterator;
441  buffer[i] = static_cast<Double_t>(outputMatrix(sampleIndex, 0));
442  sampleIterator++;
443  }
444 }
445 #if 0
446 //______________________________________________________________________________
447 template <>
448 TTensorBatch<TCpu<Double_t> > TTensorDataLoader<TensorInput, TCpu<Double_t> >::GetTensorBatch()
449 {
450  // After copying the data to the device, wrap the device buffer in the respective
451  // architectures matrix type
452  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
453 
454  Tensor_t inputTensor( std::get<0>(DeviceBuffers), { fBatchHeight, fBatchWidth, fBatchSize } );
455  // size_t jump = fBatchHeight * fBatchWidth;
456  // for (size_t i = 0; i < fBatchSize; i++) {
457  // DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
458  // inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
459  // }
460 
461  Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
462  Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
463 
464  fBatchIndex++;
465  return TTensorBatch<TCpu<Double_t> >(inputTensor, outputMatrix, weightMatrix);
466 }
467 #endif
468 
469 
470 ///- re-implement specialization for Double_t
471 //______________________________________________________________________________
472 template <>
473 void TTensorDataLoader<TMVAInput_t, TCpu<Double_t>>::CopyTensorInput(TCpuBuffer<Double_t> &buffer,
474  IndexIterator_t sampleIterator)
475 {
476  // one event, one example in the batch
477 
478  if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
479  for (size_t i = 0; i < fBatchHeight; i++) {
480  size_t sampleIndex = *sampleIterator;
481  Event * event = std::get<0>(fData)[sampleIndex];
482  for (size_t j = 0; j < fBatchWidth; j++) {
483  size_t bufferIndex = j * fBatchHeight + i;
484  buffer[bufferIndex] = event->GetValue(j);
485  }
486  sampleIterator++;
487  }
488  } else if (fBatchDepth == fBatchSize) {
489  // batchDepth is batch size
490  for (size_t i = 0; i < fBatchDepth; i++) {
491  size_t sampleIndex = *sampleIterator;
492  Event * event = std::get<0>(fData)[sampleIndex];
493  for (size_t j = 0; j < fBatchHeight; j++) {
494  for (size_t k = 0; k < fBatchWidth; k++) {
495  // because of the ordering of tensor in memory is NHWC
496  size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
497  buffer[bufferIndex] = event->GetValue(j * fBatchWidth + k);
498  }
499  }
500  sampleIterator++;
501  }
502  }
503  else {
504  Error("TTensorDataLoader","Inconsistency between batch depth and batch size");
505  R__ASSERT(0); // one event, one example in the batch
506  }
507 }
508 
509 //______________________________________________________________________________
510 template <>
511 void TTensorDataLoader<TMVAInput_t, TCpu<Double_t>>::CopyTensorOutput(TCpuBuffer<Double_t> &buffer,
512  IndexIterator_t sampleIterator)
513 {
514  const DataSetInfo &info = std::get<1>(fData);
515  size_t n = buffer.GetSize() / fBatchSize;
516 
517  // Copy target(s).
518 
519  for (size_t i = 0; i < fBatchSize; i++) {
520  size_t sampleIndex = *sampleIterator++;
521  Event *event = std::get<0>(fData)[sampleIndex];
522  for (size_t j = 0; j < n; j++) {
523  // Copy output matrices.
524  size_t bufferIndex = j * fBatchSize + i;
525  // Classification
526  if (event->GetNTargets() == 0) {
527  if (n == 1) {
528  // Binary.
529  buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
530  } else {
531  // Multiclass.
532  buffer[bufferIndex] = 0.0;
533  if (j == event->GetClass()) {
534  buffer[bufferIndex] = 1.0;
535  }
536  }
537  } else {
538  buffer[bufferIndex] = static_cast<Float_t>(event->GetTarget(j));
539  }
540  }
541  }
542 }
543 
544 //______________________________________________________________________________
545 template <>
546 void TTensorDataLoader<TMVAInput_t, TCpu<Double_t>>::CopyTensorWeights(TCpuBuffer<Double_t> &buffer,
547  IndexIterator_t sampleIterator)
548 {
549  for (size_t i = 0; i < fBatchSize; i++) {
550  size_t sampleIndex = *sampleIterator++;
551  Event *event = std::get<0>(fData)[sampleIndex];
552  buffer[i] = event->GetWeight();
553  }
554 }
555 
556 #if 0
557 //______________________________________________________________________________
558 template <>
559 TTensorBatch<TCpu<Double_t> > TTensorDataLoader<TMVAInput_t, TCpu<Double_t> >::GetTensorBatch()
560 {
561  // After copying the data to the device, wrap the device buffer in the respective
562  // architectures matrix type
563  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
564 
565 
566  Tensor_t inputTensor( std::get<0>(DeviceBuffers), { fBatchHeight, fBatchWidth, fBatchSize } );
567  // size_t jump = fBatchHeight * fBatchWidth;
568  // for (size_t i = 0; i < fBatchSize; i++) {
569  // DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
570  // inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
571  // }
572  Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
573  Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
574 
575  fBatchIndex++;
576  return TTensorBatch<TCpu<Double_t> >(inputTensor, outputMatrix, weightMatrix);
577 }
578 #endif
579 
580 ///- re-implement specialization for Float_t
581 //______________________________________________________________________________
582 template <>
583 void TTensorDataLoader<TMVAInput_t, TCpu<Float_t>>::CopyTensorInput(TCpuBuffer<Float_t> &buffer,
584  IndexIterator_t sampleIterator)
585 {
586  // one event, one example in the batch
587 
588  if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
589  for (size_t i = 0; i < fBatchHeight; i++) {
590  size_t sampleIndex = *sampleIterator;
591  Event * event = std::get<0>(fData)[sampleIndex];
592  for (size_t j = 0; j < fBatchWidth; j++) {
593  size_t bufferIndex = j * fBatchHeight + i;
594  buffer[bufferIndex] = event->GetValue(j);
595  }
596  sampleIterator++;
597  }
598  } else if (fBatchDepth == fBatchSize) {
599  // batchDepth is batch size
600  for (size_t i = 0; i < fBatchDepth; i++) {
601  size_t sampleIndex = *sampleIterator;
602  Event * event = std::get<0>(fData)[sampleIndex];
603  for (size_t j = 0; j < fBatchHeight; j++) {
604  for (size_t k = 0; k < fBatchWidth; k++) {
605  // because of the column-major ordering
606  size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
607  buffer[bufferIndex] = event->GetValue(j * fBatchWidth + k);
608  }
609  }
610  sampleIterator++;
611  }
612  }
613  else {
614  Error("TTensorDataLoader","Inconsistency between batch depth and batch size");
615  R__ASSERT(0);
616  }
617 }
618 
619 //______________________________________________________________________________
620 template <>
621 void TTensorDataLoader<TMVAInput_t, TCpu<Float_t>>::CopyTensorOutput(TCpuBuffer<Float_t> &buffer,
622  IndexIterator_t sampleIterator)
623 {
624  const DataSetInfo &info = std::get<1>(fData);
625  size_t n = buffer.GetSize() / fBatchSize;
626 
627  // Copy target(s).
628 
629  for (size_t i = 0; i < fBatchSize; i++) {
630  size_t sampleIndex = *sampleIterator++;
631  Event *event = std::get<0>(fData)[sampleIndex];
632  for (size_t j = 0; j < n; j++) {
633  // Copy output matrices.
634  size_t bufferIndex = j * fBatchSize + i;
635  // Classification
636  if (event->GetNTargets() == 0) {
637  if (n == 1) {
638  // Binary.
639  buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
640  } else {
641  // Multiclass.
642  buffer[bufferIndex] = 0.0;
643  if (j == event->GetClass()) {
644  buffer[bufferIndex] = 1.0;
645  }
646  }
647  } else {
648  buffer[bufferIndex] = static_cast<Float_t>(event->GetTarget(j));
649  }
650  }
651  }
652 }
653 
654 //______________________________________________________________________________
655 template <>
656 void TTensorDataLoader<TMVAInput_t, TCpu<Float_t>>::CopyTensorWeights(TCpuBuffer<Float_t> &buffer,
657  IndexIterator_t sampleIterator)
658 {
659  for (size_t i = 0; i < fBatchSize; i++) {
660  size_t sampleIndex = *sampleIterator++;
661  Event *event = std::get<0>(fData)[sampleIndex];
662  buffer[i] = event->GetWeight();
663  }
664 }
665 
666 #if 0
667 //______________________________________________________________________________
668 template <>
669 TTensorBatch<TCpu<Float_t> > TTensorDataLoader<TMVAInput_t, TCpu<Float_t> >::GetTensorBatch()
670 {
671  // After copying the data to the device, wrap the device buffer in the respective
672  // architectures matrix type
673  DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
674 
675  Tensor_t inputTensor( std::get<0>(DeviceBuffers), { fBatchHeight, fBatchWidth, fBatchSize } );
676  // std::vector<Matrix_t> inputTensor;
677  // size_t jump = fBatchHeight * fBatchWidth;
678  // for (size_t i = 0; i < fBatchSize; i++) {
679  // DeviceBuffer_t subInputDeviceBuffer = std::get<0>(DeviceBuffers).GetSubBuffer(i * jump, jump);
680  // inputTensor.emplace_back(subInputDeviceBuffer, fBatchHeight, fBatchWidth);
681  // }
682  Matrix_t outputMatrix(std::get<1>(DeviceBuffers), fBatchSize, fNOutputFeatures);
683  Matrix_t weightMatrix(std::get<2>(DeviceBuffers), fBatchSize, fNOutputFeatures);
684 
685  fBatchIndex++;
686  return TTensorBatch<TCpu<Float_t> >(inputTensor, outputMatrix, weightMatrix);
687 }
688 #endif
689 
690 //______________________________________________________________________________
691 // Explicit instantiations.
692 template class TCpuBuffer<Double_t>;
693 template class TCpuBuffer<Float_t>;
694 
695 template class TTensorDataLoader<TensorInput, TCpu<Float_t>>;
696 template class TTensorDataLoader<TMVAInput_t, TCpu<Float_t>>;
697 template class TTensorDataLoader<TensorInput, TCpu<Double_t>>;
698 template class TTensorDataLoader<TMVAInput_t, TCpu<Double_t>>;
699 
700 } // namespace DNN
701 } // namespace TMVA