Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TensorDataLoader.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Vladimir Ilievski
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : TTensorDataLoader *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Tensor Data Loader Class *
12  * *
13  * Authors (alphabetical): *
14  * Vladimir Ilievski <ilievski.vladimir@live.com> - CERN, Switzerland *
15  * *
16  * Copyright (c) 2005-2015: *
17  * CERN, Switzerland *
18  * U. of Victoria, Canada *
19  * MPI-K Heidelberg, Germany *
20  * U. of Bonn, Germany *
21  * *
22  * Redistribution and use in source and binary forms, with or without *
23  * modification, are permitted according to the terms listed in LICENSE *
24  * (http://tmva.sourceforge.net/LICENSE) *
25  **********************************************************************************/
26 
27 #ifndef TMVA_DNN_TENSORDATALOADER
28 #define TMVA_DNN_TENSORDATALOADER
29 
30 #include "TMatrix.h"
31 #include "TMVA/Event.h"
32 #include <algorithm>
33 
34 namespace TMVA {
35  class DataSetInfo;
36 namespace DNN {
37 
38 //
39 // Input Data Types
40 //______________________________________________________________________________
41 using TensorInput =
42  std::tuple<const std::vector<TMatrixT<Double_t>> &, const TMatrixT<Double_t> &, const TMatrixT<Double_t> &>;
43 
44 using TMVAInput_t = std::tuple<const std::vector<Event *> &, const DataSetInfo &>;
45 using IndexIterator_t = typename std::vector<size_t>::iterator;
46 
47 /** TTensorBatch
48  *
49  * Class representing training batches consisting of a vector of matrices as input data
50  * and a matrix of output data. The input and output data can be accessed using
51  * the GetInput() and GetOutput() member functions.
52  *
53  * \tparam Architecture_t The underlying architecture.
54  */
55 
56 template <typename Architecture_t>
57 class TTensorBatch {
58 public:
59  using Matrix_t = typename Architecture_t::Matrix_t;
60  using Tensor_t = typename Architecture_t::Tensor_t;
61 
62 private:
63  Tensor_t fInputTensor; ///< The input tensor batch, one matrix one input.
64  Matrix_t fOutputMatrix; ///< The output matrix representing the ground truth.
65  Matrix_t fWeightMatrix; ///< The event/example weights
66 
67 public:
68  TTensorBatch(Tensor_t &, Matrix_t &, Matrix_t &);
69  TTensorBatch(const TTensorBatch &) = default;
70  TTensorBatch(TTensorBatch &&) = default;
71  TTensorBatch &operator=(const TTensorBatch &) = default;
72  TTensorBatch &operator=(TTensorBatch &&) = default;
73 
74  /** Return the tensor representing the input data */
75  Tensor_t &GetInput() { return fInputTensor; }
76  /** Return the matrix representing the output data. */
77  Matrix_t &GetOutput() { return fOutputMatrix; }
78  /** Return the matrix holding the event weights. */
79  Matrix_t &GetWeights() { return fWeightMatrix; }
80 };
81 
82 template <typename Data_t, typename Architecture_t>
83 class TTensorDataLoader;
84 
85 /** TTensorBatchIterator
86  *
87  * Simple iterator class for the iterations over the training batches in
88  * a given data set represented by a TTensorDataLoader object.
89  *
90  * \tparam Data_t The input data type.
91  * \tparam Architecture_t The underlying architecture type.
92  */
93 template <typename Data_t, typename Architecture_t>
94 class TTensorBatchIterator {
95 private:
96  TTensorDataLoader<Data_t, Architecture_t> &fTensorDataLoader;
97  size_t fBatchIndex;
98 
99 public:
100  TTensorBatchIterator(TTensorDataLoader<Data_t, Architecture_t> &tensorDataLoader, size_t index = 0)
101  : fTensorDataLoader(tensorDataLoader), fBatchIndex(index)
102  {
103  // Nothing to do here.
104  }
105 
106  TTensorBatch<Architecture_t> operator*() { return fTensorDataLoader.GetTensorBatch(); }
107  TTensorBatchIterator operator++()
108  {
109  fBatchIndex++;
110  return *this;
111  }
112  bool operator!=(const TTensorBatchIterator &other) { return fBatchIndex != other.fBatchIndex; }
113 };
114 
115 /** TTensorDataLoader
116  *
117  * Service class managing the streaming of the training data from the input data
118  * type to the accelerator device or the CPU. A TTensorDataLoader object manages
119  * a number of host and device buffer pairs that are used in a round-robin manner
120  * for the transfer of batches to the device.
121  *
122  * Each TTensorDataLoader object has an associated batch size and a number of total
123  * samples in the dataset. One epoch is the number of buffers required to transfer
124  * the complete training set. Using the begin() and end() member functions allows
125  * the user to iterate over the batches in one epoch.
126  *
127  * \tparam Data_t The input data type.
128  * \tparam Architecture_t The achitecture class of the underlying architecture.
129  */
130 template <typename Data_t, typename Architecture_t>
131 class TTensorDataLoader {
132 private:
133  using HostBuffer_t = typename Architecture_t::HostBuffer_t;
134  using DeviceBuffer_t = typename Architecture_t::DeviceBuffer_t;
135  using Matrix_t = typename Architecture_t::Matrix_t;
136  using Tensor_t = typename Architecture_t::Tensor_t;
137  using Shape_t = typename Architecture_t::Tensor_t::Shape_t;
138  using BatchIterator_t = TTensorBatchIterator<Data_t, Architecture_t>;
139 
140  const Data_t &fData; ///< The data that should be loaded in the batches.
141  size_t fNSamples; ///< The total number of samples in the dataset.
142  size_t fBatchSize; ///< The size of a batch.
143  Shape_t fInputLayout; // the input data layout (does not include batch size)
144  size_t fBatchDepth; ///< The number of matrices in the tensor.
145  size_t fBatchHeight; ///< The number od rows in each matrix.
146  size_t fBatchWidth; ///< The number of columns in each matrix.
147  size_t fNOutputFeatures; ///< The number of outputs from the classifier/regressor.
148  size_t fBatchIndex; ///< The index of the batch when there are multiple batches in parallel
149 
150 
151  size_t fNStreams; ///< Number of buffer pairs.
152  std::vector<DeviceBuffer_t> fDeviceBuffers; ///< The device buffers used to keep the input, output and weight data.
153  std::vector<HostBuffer_t> fHostBuffers; ///< The host buffers used to load the input, output and weight data.
154 
155  std::vector<size_t> fSampleIndices; ///< Ordering of the samples in the epoch.
156 
157 public:
158  /*! Constructor. */
159  TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize, const Shape_t & inputLayout,
160  const Shape_t & batchLayout, size_t nOutputFeatures, size_t nStreams = 1);
161 
162  TTensorDataLoader(const TTensorDataLoader &) = default;
163  TTensorDataLoader(TTensorDataLoader &&) = default;
164  TTensorDataLoader &operator=(const TTensorDataLoader &) = default;
165  TTensorDataLoader &operator=(TTensorDataLoader &&) = default;
166 
167  /** Copy input tensor into the given host buffer. Function to be specialized by
168  * the architecture-specific backend. */
169  void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin);
170  /** Copy output matrix into the given host buffer. Function to be specialized
171  * by the architecture-spcific backend. */
172  void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin);
173  /** Copy weight matrix into the given host buffer. Function to be specialized
174  * by the architecture-spcific backend. */
175  void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin);
176 
177  BatchIterator_t begin() { return TTensorBatchIterator<Data_t, Architecture_t>(*this); }
178  BatchIterator_t end() { return TTensorBatchIterator<Data_t, Architecture_t>(*this, fNSamples / fBatchSize); }
179 
180  /** Shuffle the order of the samples in the batch. The shuffling is indirect,
181  * i.e. only the indices are shuffled. No input data is moved by this
182  * routine. */
183  template<typename RNG>
184  void Shuffle(RNG & rng);
185 
186  /** Return the next batch from the training set. The TTensorDataLoader object
187  * keeps an internal counter that cycles over the batches in the training
188  * set. */
189  TTensorBatch<Architecture_t> GetTensorBatch();
190 };
191 
192 //
193 // TTensorBatch Class.
194 //______________________________________________________________________________
195 template <typename Architecture_t>
196 TTensorBatch<Architecture_t>::TTensorBatch(Tensor_t &inputTensor, Matrix_t &outputMatrix,
197  Matrix_t &weightMatrix)
198  : fInputTensor(inputTensor), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)
199 {
200  // Nothing to do here.
201 }
202 
203 //
204 // TTensorDataLoader Class.
205 //______________________________________________________________________________
206 template <typename Data_t, typename Architecture_t>
207 TTensorDataLoader<Data_t, Architecture_t>::TTensorDataLoader(const Data_t &data, size_t nSamples, size_t batchSize,
208  const Shape_t & inputLayout, const Shape_t & batchLayout,
209  size_t nOutputFeatures, size_t nStreams)
210  : fData(data), fNSamples(nSamples), fBatchSize(batchSize), fInputLayout(inputLayout), fBatchDepth(batchLayout[0]), fBatchHeight(batchLayout[1]),
211  fBatchWidth(batchLayout[2]), fNOutputFeatures(nOutputFeatures), fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(),
212  fHostBuffers(), fSampleIndices()
213 {
214  size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
215  size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
216  size_t weightMatrixSize = fBatchSize;
217 
218  for (size_t i = 0; i < fNStreams; i++) {
219  fHostBuffers.push_back(HostBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
220  fDeviceBuffers.push_back(DeviceBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
221  }
222 
223  fSampleIndices.reserve(fNSamples);
224  for (size_t i = 0; i < fNSamples; i++) {
225  fSampleIndices.push_back(i);
226  }
227 }
228 
229 //______________________________________________________________________________
230 template <typename Data_t, typename Architecture_t>
231 TTensorBatch<Architecture_t> TTensorDataLoader<Data_t, Architecture_t>::GetTensorBatch()
232 {
233  fBatchIndex %= (fNSamples / fBatchSize); // Cycle through samples.
234 
235  size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
236  size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
237  size_t weightMatrixSize = fBatchSize;
238 
239  size_t streamIndex = fBatchIndex % fNStreams;
240  HostBuffer_t &hostBuffer = fHostBuffers[streamIndex];
241  DeviceBuffer_t &deviceBuffer = fDeviceBuffers[streamIndex];
242 
243  HostBuffer_t inputHostBuffer = hostBuffer.GetSubBuffer(0, inputTensorSize);
244  HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
245  HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
246 
247  DeviceBuffer_t inputDeviceBuffer = deviceBuffer.GetSubBuffer(0, inputTensorSize);
248  DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
249  DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
250 
251  // here sample index has batch size as offset , while in
252  // copy tensor input has batch depth.
253  // We support then now two cases: batchdepth = 1 batchHeight = batch size
254  // or batch depth = batch
255  size_t sampleIndex = fBatchIndex * fBatchSize;
256  IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
257 
258  CopyTensorInput(inputHostBuffer, sampleIndexIterator);
259  CopyTensorOutput(outputHostBuffer, sampleIndexIterator);
260  CopyTensorWeights(weightHostBuffer, sampleIndexIterator);
261 
262  deviceBuffer.CopyFrom(hostBuffer);
263 
264  assert(fInputLayout.size() == 3);
265  Tensor_t inputTensor = Architecture_t::CreateTensor( inputDeviceBuffer, fBatchSize, fInputLayout[0], fInputLayout[1], fInputLayout[2] );
266  // in case of dense layers
267  if (fBatchDepth == 1 && fBatchHeight == fBatchSize && fInputLayout[0] == 1 && fInputLayout[1] == 1){
268  inputTensor = Tensor_t( inputDeviceBuffer, {fBatchSize, fInputLayout.back() }, Tensor_t::MemoryLayout::ColumnMajor );
269  }
270 
271  Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);
272  Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, 1);
273 
274  fBatchIndex++;
275 
276 
277  return TTensorBatch<Architecture_t>(inputTensor, outputMatrix, weightMatrix);
278 }
279 
280 //______________________________________________________________________________
281 template <typename Data_t, typename Architecture_t>
282 template <typename RNG>
283 void TTensorDataLoader<Data_t, Architecture_t>::Shuffle(RNG & rng)
284 {
285  std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), rng);
286 }
287 
288 } // namespace DNN
289 } // namespace TMVA
290 
291 #endif