18 #include "cuda_runtime.h"
29 void TTensorDataLoader<TensorInput, TCudnn<float> >::CopyTensorInput(TCudaHostBuffer<float> &buffer,
30 IndexIterator_t sampleIterator)
32 const std::vector<TMatrixT<Double_t> > &inputTensor = std::get<0>(fData);
34 if (fBatchDepth == 1) {
35 for (
size_t i = 0; i < fBatchHeight; i++) {
36 size_t sampleIndex = *sampleIterator;
37 for (
size_t j = 0; j < fBatchWidth; j++) {
38 size_t bufferIndex = j * fBatchHeight + i;
39 buffer[bufferIndex] =
static_cast<float>(inputTensor[0](sampleIndex, j));
44 for (
size_t i = 0; i < fBatchDepth; i++) {
45 size_t sampleIndex = *sampleIterator;
46 for (
size_t j = 0; j < fBatchHeight; j++) {
47 for (
size_t k = 0; k < fBatchWidth; k++) {
48 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
49 buffer[bufferIndex] =
static_cast<float>(inputTensor[sampleIndex](j, k));
59 void TTensorDataLoader<TensorInput, TCudnn<float> >::CopyTensorOutput(TCudaHostBuffer<float> &buffer,
60 IndexIterator_t sampleIterator)
62 const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
63 size_t n = outputMatrix.GetNcols();
65 for (
size_t i = 0; i < fBatchSize; i++) {
66 size_t sampleIndex = *sampleIterator;
67 for (
size_t j = 0; j < n; j++) {
68 size_t bufferIndex = j * fBatchSize + i;
69 buffer[bufferIndex] =
static_cast<float>(outputMatrix(sampleIndex, j));
77 void TTensorDataLoader<TensorInput, TCudnn<float> >::CopyTensorWeights(TCudaHostBuffer<float> &buffer,
78 IndexIterator_t sampleIterator)
80 const TMatrixT<Double_t> &weightMatrix = std::get<2>(fData);
82 for (
size_t i = 0; i < fBatchSize; i++) {
83 buffer[i] =
static_cast<float>(weightMatrix(*sampleIterator, 0));
90 void TTensorDataLoader<TMVAInput_t, TCudnn<float> >::CopyTensorInput(TCudaHostBuffer<float> &buffer,
91 IndexIterator_t sampleIterator)
95 if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
96 for (
size_t i = 0; i < fBatchHeight; i++) {
97 size_t sampleIndex = *sampleIterator;
98 Event *
event = std::get<0>(fData)[sampleIndex];
99 for (
size_t j = 0; j < fBatchWidth; j++) {
100 size_t bufferIndex = j * fBatchHeight + i;
101 buffer[bufferIndex] =
event->GetValue(j);
106 }
else if (fBatchDepth == fBatchSize) {
107 for (
size_t i = 0; i < fBatchSize; i++) {
108 size_t sampleIndex = *sampleIterator;
109 Event *
event = std::get<0>(fData)[sampleIndex];
110 for (
size_t j = 0; j < fBatchHeight; j++) {
111 for (
size_t k = 0; k < fBatchWidth; k++) {
113 size_t bufferIndex = i * fBatchHeight * fBatchWidth + j * fBatchWidth + k;
114 buffer[bufferIndex] =
event->GetValue(j * fBatchWidth + k);
121 std::cout << fBatchDepth << fBatchSize << fBatchHeight << std::endl;
122 Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
128 void TTensorDataLoader<TMVAInput_t, TCudnn<float> >::CopyTensorOutput(TCudaHostBuffer<float> &buffer,
129 IndexIterator_t sampleIterator)
131 const DataSetInfo &info = std::get<1>(fData);
132 size_t n = buffer.GetSize() / fBatchSize;
135 for (
size_t i = 0; i < fBatchSize; i++) {
136 size_t sampleIndex = *sampleIterator++;
137 Event *
event = std::get<0>(fData)[sampleIndex];
138 for (
size_t j = 0; j < n; j++) {
140 size_t bufferIndex = j * fBatchSize + i;
142 if (event->GetNTargets() == 0) {
145 buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
148 buffer[bufferIndex] = 0.0;
149 if (j == event->GetClass()) {
150 buffer[bufferIndex] = 1.0;
154 buffer[bufferIndex] =
static_cast<Float_t
>(
event->GetTarget(j));
162 void TTensorDataLoader<TMVAInput_t, TCudnn<float> >::CopyTensorWeights(TCudaHostBuffer<float> &buffer,
163 IndexIterator_t sampleIterator)
165 for (
size_t i = 0; i < fBatchSize; i++) {
166 size_t sampleIndex = *sampleIterator++;
167 Event *
event = std::get<0>(fData)[sampleIndex];
168 buffer[i] =
event->GetWeight();
174 void TTensorDataLoader<TensorInput, TCudnn<double> >::CopyTensorInput(TCudaHostBuffer<double> &buffer,
175 IndexIterator_t sampleIterator)
177 const std::vector<TMatrixT<Double_t> > &inputTensor = std::get<0>(fData);
179 if (fBatchDepth == 1) {
180 for (
size_t i = 0; i < fBatchHeight; i++) {
181 size_t sampleIndex = *sampleIterator;
182 for (
size_t j = 0; j < fBatchWidth; j++) {
183 size_t bufferIndex = j * fBatchHeight + i;
184 buffer[bufferIndex] =
static_cast<double>(inputTensor[0](sampleIndex, j));
189 for (
size_t i = 0; i < fBatchDepth; i++) {
190 size_t sampleIndex = *sampleIterator;
191 for (
size_t j = 0; j < fBatchHeight; j++) {
192 for (
size_t k = 0; k < fBatchWidth; k++) {
193 size_t bufferIndex = i * fBatchHeight * fBatchWidth + k * fBatchHeight + j;
194 buffer[bufferIndex] =
static_cast<double>(inputTensor[sampleIndex](j, k));
204 void TTensorDataLoader<TensorInput, TCudnn<double> >::CopyTensorOutput(TCudaHostBuffer<double> &buffer,
205 IndexIterator_t sampleIterator)
207 const TMatrixT<Double_t> &outputMatrix = std::get<1>(fData);
208 size_t n = outputMatrix.GetNcols();
210 for (
size_t i = 0; i < fBatchSize; i++) {
211 size_t sampleIndex = *sampleIterator;
212 for (
size_t j = 0; j < n; j++) {
213 size_t bufferIndex = j * fBatchSize + i;
214 buffer[bufferIndex] = outputMatrix(sampleIndex, j);
222 void TTensorDataLoader<TensorInput, TCudnn<double> >::CopyTensorWeights(TCudaHostBuffer<double> &buffer,
223 IndexIterator_t sampleIterator)
225 const TMatrixT<Double_t> &weightMatrix = std::get<2>(fData);
226 for (
size_t i = 0; i < fBatchSize; i++) {
227 buffer[i] = weightMatrix(*sampleIterator, 0);
234 void TTensorDataLoader<TMVAInput_t, TCudnn<double> >::CopyTensorInput(TCudaHostBuffer<double> &buffer,
235 IndexIterator_t sampleIterator)
238 if (fBatchDepth == 1 && fBatchHeight == fBatchSize) {
239 for (
size_t i = 0; i < fBatchHeight; i++) {
240 size_t sampleIndex = *sampleIterator;
241 Event *
event = std::get<0>(fData)[sampleIndex];
242 for (
size_t j = 0; j < fBatchWidth; j++) {
243 size_t bufferIndex = j * fBatchHeight + i;
244 buffer[bufferIndex] =
event->GetValue(j);
248 }
else if (fBatchDepth == fBatchSize) {
250 for (
size_t i = 0; i < fBatchDepth; i++) {
251 size_t sampleIndex = *sampleIterator;
252 Event *
event = std::get<0>(fData)[sampleIndex];
253 for (
size_t j = 0; j < fBatchHeight; j++) {
254 for (
size_t k = 0; k < fBatchWidth; k++) {
256 size_t bufferIndex = i * fBatchHeight * fBatchWidth + j * fBatchWidth + k;
257 buffer[bufferIndex] =
event->GetValue(j * fBatchWidth + k);
264 Error(
"TTensorDataLoader",
"Inconsistency between batch depth and batch size");
271 void TTensorDataLoader<TMVAInput_t, TCudnn<double> >::CopyTensorOutput(TCudaHostBuffer<double> &buffer,
272 IndexIterator_t sampleIterator)
274 const DataSetInfo &info = std::get<1>(fData);
275 size_t n = buffer.GetSize() / fBatchSize;
279 for (
size_t i = 0; i < fBatchSize; i++) {
280 size_t sampleIndex = *sampleIterator++;
281 Event *
event = std::get<0>(fData)[sampleIndex];
282 for (
size_t j = 0; j < n; j++) {
284 size_t bufferIndex = j * fBatchSize + i;
286 if (event->GetNTargets() == 0) {
289 buffer[bufferIndex] = (info.IsSignal(event)) ? 1.0 : 0.0;
292 buffer[bufferIndex] = 0.0;
293 if (j == event->GetClass()) {
294 buffer[bufferIndex] = 1.0;
298 buffer[bufferIndex] =
static_cast<Double_t
>(
event->GetTarget(j));
306 void TTensorDataLoader<TMVAInput_t, TCudnn<double> >::CopyTensorWeights(TCudaHostBuffer<double> &buffer,
307 IndexIterator_t sampleIterator)
309 for (
size_t i = 0; i < fBatchSize; i++) {
310 size_t sampleIndex = *sampleIterator++;
311 Event *
event = std::get<0>(fData)[sampleIndex];
312 buffer[i] =
event->GetWeight();
319 TTensorBatch<TCudnn<float> > TTensorDataLoader<TensorInput, TCudnn<float> >::GetTensorBatch()
322 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
324 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
325 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
326 std::vector<TCudaTensor<float> > inputTensor(1, TCudaTensor<float>(std::get<0>(DeviceBuffers),
327 this->GetTensorDim(), fInputShape));
328 TCudaTensor<float> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
329 TCudaTensor<float> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
332 return TTensorBatch<TCudnn<float> >(inputTensor, outputMatrix, weightMatrix);
337 TTensorBatch<TCudnn<double> > TTensorDataLoader<TensorInput, TCudnn<double> >::GetTensorBatch()
340 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
342 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
343 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
344 std::vector<TCudaTensor<double> > inputTensor(1, TCudaTensor<double>(std::get<0>(DeviceBuffers),
345 this->GetTensorDim(), fInputShape));
346 TCudaTensor<double> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
347 TCudaTensor<double> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
350 return TTensorBatch<TCudnn<double> >(inputTensor, outputMatrix, weightMatrix);
355 TTensorBatch<TCudnn<float> > TTensorDataLoader<TMVAInput_t, TCudnn<float> >::GetTensorBatch()
358 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
360 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
361 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
362 std::vector<TCudaTensor<float> > inputTensor(1, TCudaTensor<float>(std::get<0>(DeviceBuffers),
363 this->GetTensorDim(), fInputShape));
364 TCudaTensor<float> outputMatrix(std::get<1>(DeviceBuffers), this->GetTensorDim(), outputShape);
365 TCudaTensor<float> weightMatrix(std::get<2>(DeviceBuffers), this->GetTensorDim(), wheightShape);
368 return TTensorBatch<TCudnn<float> >(inputTensor, outputMatrix, weightMatrix);
373 TTensorBatch<TCudnn<double> > TTensorDataLoader<TMVAInput_t, TCudnn<double> >::GetTensorBatch()
376 DeviceBufferTuple DeviceBuffers = CopyTensorBatches();
378 std::vector<size_t> outputShape {fBatchSize, 1, fNOutputFeatures, 1};
379 std::vector<size_t> wheightShape {fBatchSize, 1, 1, 1};
380 std::vector<TCudaTensor<double> > inputTensor(1, TCudaTensor<double>(std::get<0>(DeviceBuffers),
381 this->GetTensorDim(), fInputShape));
382 TCudaTensor<double> outputMatrix(std::get<1>(DeviceBuffers), fNOutputFeatures + 2, outputShape);
383 TCudaTensor<double> weightMatrix(std::get<2>(DeviceBuffers), 3, wheightShape);
386 return TTensorBatch<TCudnn<double> >(inputTensor, outputMatrix, weightMatrix);
394 template class TTensorDataLoader<TensorInput, TCudnn<float> >;
395 template class TTensorDataLoader<TMVAInput_t, TCudnn<float> >;
396 template class TTensorDataLoader<TensorInput, TCudnn<double> >;
397 template class TTensorDataLoader<TMVAInput_t, TCudnn<double> >;