27 #ifndef TMVA_DNN_TENSORDATALOADER
28 #define TMVA_DNN_TENSORDATALOADER
42 std::tuple<const std::vector<TMatrixT<Double_t>> &,
const TMatrixT<Double_t> &,
const TMatrixT<Double_t> &>;
44 using TMVAInput_t = std::tuple<const std::vector<Event *> &,
const DataSetInfo &>;
45 using IndexIterator_t =
typename std::vector<size_t>::iterator;
56 template <
typename Architecture_t>
59 using Matrix_t =
typename Architecture_t::Matrix_t;
60 using Tensor_t =
typename Architecture_t::Tensor_t;
63 Tensor_t fInputTensor;
64 Matrix_t fOutputMatrix;
65 Matrix_t fWeightMatrix;
68 TTensorBatch(Tensor_t &, Matrix_t &, Matrix_t &);
69 TTensorBatch(
const TTensorBatch &) =
default;
70 TTensorBatch(TTensorBatch &&) =
default;
71 TTensorBatch &operator=(
const TTensorBatch &) =
default;
72 TTensorBatch &operator=(TTensorBatch &&) =
default;
75 Tensor_t &GetInput() {
return fInputTensor; }
77 Matrix_t &GetOutput() {
return fOutputMatrix; }
79 Matrix_t &GetWeights() {
return fWeightMatrix; }
82 template <
typename Data_t,
typename Architecture_t>
83 class TTensorDataLoader;
93 template <
typename Data_t,
typename Architecture_t>
94 class TTensorBatchIterator {
96 TTensorDataLoader<Data_t, Architecture_t> &fTensorDataLoader;
100 TTensorBatchIterator(TTensorDataLoader<Data_t, Architecture_t> &tensorDataLoader,
size_t index = 0)
101 : fTensorDataLoader(tensorDataLoader), fBatchIndex(index)
106 TTensorBatch<Architecture_t> operator*() {
return fTensorDataLoader.GetTensorBatch(); }
107 TTensorBatchIterator operator++()
112 bool operator!=(
const TTensorBatchIterator &other) {
return fBatchIndex != other.fBatchIndex; }
130 template <
typename Data_t,
typename Architecture_t>
131 class TTensorDataLoader {
133 using HostBuffer_t =
typename Architecture_t::HostBuffer_t;
134 using DeviceBuffer_t =
typename Architecture_t::DeviceBuffer_t;
135 using Matrix_t =
typename Architecture_t::Matrix_t;
136 using Tensor_t =
typename Architecture_t::Tensor_t;
137 using Shape_t =
typename Architecture_t::Tensor_t::Shape_t;
138 using BatchIterator_t = TTensorBatchIterator<Data_t, Architecture_t>;
143 Shape_t fInputLayout;
147 size_t fNOutputFeatures;
152 std::vector<DeviceBuffer_t> fDeviceBuffers;
153 std::vector<HostBuffer_t> fHostBuffers;
155 std::vector<size_t> fSampleIndices;
159 TTensorDataLoader(
const Data_t &data,
size_t nSamples,
size_t batchSize,
const Shape_t & inputLayout,
160 const Shape_t & batchLayout,
size_t nOutputFeatures,
size_t nStreams = 1);
162 TTensorDataLoader(
const TTensorDataLoader &) =
default;
163 TTensorDataLoader(TTensorDataLoader &&) =
default;
164 TTensorDataLoader &operator=(
const TTensorDataLoader &) =
default;
165 TTensorDataLoader &operator=(TTensorDataLoader &&) =
default;
169 void CopyTensorInput(HostBuffer_t &buffer, IndexIterator_t begin);
172 void CopyTensorOutput(HostBuffer_t &buffer, IndexIterator_t begin);
175 void CopyTensorWeights(HostBuffer_t &buffer, IndexIterator_t begin);
177 BatchIterator_t begin() {
return TTensorBatchIterator<Data_t, Architecture_t>(*this); }
178 BatchIterator_t end() {
return TTensorBatchIterator<Data_t, Architecture_t>(*
this, fNSamples / fBatchSize); }
183 template<
typename RNG>
184 void Shuffle(RNG & rng);
189 TTensorBatch<Architecture_t> GetTensorBatch();
195 template <
typename Architecture_t>
196 TTensorBatch<Architecture_t>::TTensorBatch(Tensor_t &inputTensor, Matrix_t &outputMatrix,
197 Matrix_t &weightMatrix)
198 : fInputTensor(inputTensor), fOutputMatrix(outputMatrix), fWeightMatrix(weightMatrix)
206 template <
typename Data_t,
typename Architecture_t>
207 TTensorDataLoader<Data_t, Architecture_t>::TTensorDataLoader(
const Data_t &data,
size_t nSamples,
size_t batchSize,
208 const Shape_t & inputLayout,
const Shape_t & batchLayout,
209 size_t nOutputFeatures,
size_t nStreams)
210 : fData(data), fNSamples(nSamples), fBatchSize(batchSize), fInputLayout(inputLayout), fBatchDepth(batchLayout[0]), fBatchHeight(batchLayout[1]),
211 fBatchWidth(batchLayout[2]), fNOutputFeatures(nOutputFeatures), fBatchIndex(0), fNStreams(nStreams), fDeviceBuffers(),
212 fHostBuffers(), fSampleIndices()
214 size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
215 size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
216 size_t weightMatrixSize = fBatchSize;
218 for (
size_t i = 0; i < fNStreams; i++) {
219 fHostBuffers.push_back(HostBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
220 fDeviceBuffers.push_back(DeviceBuffer_t(inputTensorSize + outputMatrixSize + weightMatrixSize));
223 fSampleIndices.reserve(fNSamples);
224 for (
size_t i = 0; i < fNSamples; i++) {
225 fSampleIndices.push_back(i);
230 template <
typename Data_t,
typename Architecture_t>
231 TTensorBatch<Architecture_t> TTensorDataLoader<Data_t, Architecture_t>::GetTensorBatch()
233 fBatchIndex %= (fNSamples / fBatchSize);
235 size_t inputTensorSize = fBatchDepth * fBatchHeight * fBatchWidth;
236 size_t outputMatrixSize = fBatchSize * fNOutputFeatures;
237 size_t weightMatrixSize = fBatchSize;
239 size_t streamIndex = fBatchIndex % fNStreams;
240 HostBuffer_t &hostBuffer = fHostBuffers[streamIndex];
241 DeviceBuffer_t &deviceBuffer = fDeviceBuffers[streamIndex];
243 HostBuffer_t inputHostBuffer = hostBuffer.GetSubBuffer(0, inputTensorSize);
244 HostBuffer_t outputHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
245 HostBuffer_t weightHostBuffer = hostBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
247 DeviceBuffer_t inputDeviceBuffer = deviceBuffer.GetSubBuffer(0, inputTensorSize);
248 DeviceBuffer_t outputDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize, outputMatrixSize);
249 DeviceBuffer_t weightDeviceBuffer = deviceBuffer.GetSubBuffer(inputTensorSize + outputMatrixSize, weightMatrixSize);
255 size_t sampleIndex = fBatchIndex * fBatchSize;
256 IndexIterator_t sampleIndexIterator = fSampleIndices.begin() + sampleIndex;
258 CopyTensorInput(inputHostBuffer, sampleIndexIterator);
259 CopyTensorOutput(outputHostBuffer, sampleIndexIterator);
260 CopyTensorWeights(weightHostBuffer, sampleIndexIterator);
262 deviceBuffer.CopyFrom(hostBuffer);
264 assert(fInputLayout.size() == 3);
265 Tensor_t inputTensor = Architecture_t::CreateTensor( inputDeviceBuffer, fBatchSize, fInputLayout[0], fInputLayout[1], fInputLayout[2] );
267 if (fBatchDepth == 1 && fBatchHeight == fBatchSize && fInputLayout[0] == 1 && fInputLayout[1] == 1){
268 inputTensor = Tensor_t( inputDeviceBuffer, {fBatchSize, fInputLayout.back() }, Tensor_t::MemoryLayout::ColumnMajor );
271 Matrix_t outputMatrix(outputDeviceBuffer, fBatchSize, fNOutputFeatures);
272 Matrix_t weightMatrix(weightDeviceBuffer, fBatchSize, 1);
277 return TTensorBatch<Architecture_t>(inputTensor, outputMatrix, weightMatrix);
281 template <
typename Data_t,
typename Architecture_t>
282 template <
typename RNG>
283 void TTensorDataLoader<Data_t, Architecture_t>::Shuffle(RNG & rng)
285 std::shuffle(fSampleIndices.begin(), fSampleIndices.end(), rng);