Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
Propagation.hxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$ // Author: Simon Pfreundschuh 10/07/16
2 
3 /*************************************************************************
4  * Copyright (C) 2016, Simon Pfreundschuh *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 /////////////////////////////////////////////////////////////////////
12 // Implementation of the functions required for the forward and //
13 // backward propagation of activations through a neural network in //
14 // the reference implementation. //
15 /////////////////////////////////////////////////////////////////////
16 
18 
19 namespace TMVA {
20 namespace DNN {
21 
22 template <typename AReal>
23 void TReference<AReal>::MultiplyTranspose(TMatrixT<AReal> &output, const TMatrixT<AReal> &input,
24  const TMatrixT<AReal> &weights)
25 {
26  output.MultT(input, weights);
27 }
28 
29 template <typename AReal>
30 void TReference<AReal>::AddRowWise(TMatrixT<AReal> &output, const TMatrixT<AReal> &biases)
31 {
32  for (size_t i = 0; i < (size_t)output.GetNrows(); i++) {
33  for (size_t j = 0; j < (size_t)output.GetNcols(); j++) {
34  output(i, j) += biases(j, 0);
35  }
36  }
37 }
38 
39 template <typename AReal>
40 void TReference<AReal>::Backward(TMatrixT<AReal> &activation_gradients_backward, TMatrixT<AReal> &weight_gradients,
41  TMatrixT<AReal> &bias_gradients, TMatrixT<AReal> &df,
42  const TMatrixT<AReal> &activation_gradients, const TMatrixT<AReal> &weights,
43  const TMatrixT<AReal> &activations_backward)
44 {
45 
46  // Compute element-wise product.
47  for (size_t i = 0; i < (size_t)df.GetNrows(); i++) {
48  for (size_t j = 0; j < (size_t)df.GetNcols(); j++) {
49  df(i, j) *= activation_gradients(i, j);
50  }
51  }
52 
53  // Activation gradients.
54  if (activation_gradients_backward.GetNoElements() > 0) {
55  activation_gradients_backward.Mult(df, weights);
56  }
57 
58  // Weights gradients.
59  if (weight_gradients.GetNoElements() > 0) {
60  weight_gradients.TMult(df, activations_backward);
61  }
62 
63  // Bias gradients.
64  if (bias_gradients.GetNoElements() > 0) {
65  for (size_t j = 0; j < (size_t)df.GetNcols(); j++) {
66  AReal sum = 0.0;
67  for (size_t i = 0; i < (size_t)df.GetNrows(); i++) {
68  sum += df(i, j);
69  }
70  bias_gradients(j, 0) = sum;
71  }
72  }
73 }
74 
75 template <typename AReal>
76 void TReference<AReal>::ScaleAdd(TMatrixT<AReal> &A, const TMatrixT<AReal> &B, AReal beta)
77 {
78  for (size_t i = 0; i < (size_t)A.GetNrows(); i++) {
79  for (size_t j = 0; j < (size_t)A.GetNcols(); j++) {
80  A(i, j) += beta * B(i, j);
81  }
82  }
83 }
84 
85 template <typename AReal>
86 void TReference<AReal>::Copy(TMatrixT<AReal> &A, const TMatrixT<AReal> &B)
87 {
88  A = B;
89 }
90 
91 template <typename AReal>
92 void TReference<AReal>::ScaleAdd(std::vector<TMatrixT<AReal>> &A, const std::vector<TMatrixT<AReal>> &B, AReal beta)
93 {
94  for (size_t i = 0; i < A.size(); ++i) {
95  ScaleAdd(A[i], B[i], beta);
96  }
97 }
98 
99 template <typename AReal>
100 void TReference<AReal>::Copy(std::vector<TMatrixT<AReal>> &A, const std::vector<TMatrixT<AReal>> &B)
101 {
102  for (size_t i = 0; i < A.size(); ++i) {
103  Copy(A[i], B[i]);
104  }
105 }
106 
107 //______________________________________________________________________________
108 template <typename AReal>
109 void TReference<AReal>::Im2col(TMatrixT<AReal> &A, const TMatrixT<AReal> &B, size_t imgHeight, size_t imgWidth,
110  size_t fltHeight, size_t fltWidth, size_t strideRows, size_t strideCols,
111  size_t zeroPaddingHeight, size_t zeroPaddingWidth)
112 {
113  // image boudaries
114  int imgHeightBound = imgHeight + zeroPaddingHeight - (fltHeight - 1) / 2 - 1;
115  int imgWidthBound = imgWidth + zeroPaddingWidth - (fltWidth - 1) / 2 - 1;
116  size_t currLocalView = 0;
117 
118  // convolution centers
119  for (int i = -zeroPaddingHeight + fltHeight / 2; i <= imgHeightBound; i += strideRows) {
120  for (int j = -zeroPaddingWidth + fltWidth / 2; j <= imgWidthBound; j += strideCols) {
121  size_t currLocalViewPixel = 0;
122 
123  // within the local view
124  for (int m = 0; m < B.GetNrows(); m++) {
125  for (Int_t k = i - Int_t(fltHeight) / 2; k <= i + (Int_t(fltHeight) - 1) / 2; k++) {
126  for (Int_t l = j - Int_t(fltWidth) / 2; l <= j + (Int_t(fltWidth) - 1) / 2; l++) {
127 
128  // Check the boundaries
129  if (k < 0 || k >= Int_t(imgHeight) || l < 0 || l >= Int_t(imgWidth))
130  A(currLocalView, currLocalViewPixel++) = 0;
131  else
132  A(currLocalView, currLocalViewPixel++) = B(m, k * imgWidth + l);
133  }
134  }
135  }
136 
137  currLocalView++;
138  }
139  }
140 }
141 
142 //______________________________________________________________________________
143 template <typename AReal>
144 void TReference<AReal>::RotateWeights(TMatrixT<AReal> &A, const TMatrixT<AReal> &B, size_t filterDepth,
145  size_t filterHeight, size_t filterWidth, size_t numFilters)
146 {
147  size_t jump = filterHeight * filterWidth;
148  for (size_t j = 0; j < filterDepth; j++) {
149  for (size_t k = 0; k < numFilters; k++) {
150  for (size_t i = 0; i < jump; i++) {
151  A(j, k * jump + i) = B(k, ((j + 1) * jump - 1) - i);
152  }
153  }
154  }
155 }
156 
157 //______________________________________________________________________________
158 template <typename AReal>
159 void TReference<AReal>::AddConvBiases(TMatrixT<AReal> &output, const TMatrixT<AReal> &biases)
160 {
161  for (size_t i = 0; i < (size_t)output.GetNrows(); i++) {
162  for (size_t j = 0; j < (size_t)output.GetNcols(); j++) {
163  output(i, j) += biases(i, 0);
164  }
165  }
166 }
167 
168 #ifdef HAVE_CNN_REFERENCE
169 //______________________________________________________________________________
170 template <typename AReal>
171 void TReference<AReal>::ConvLayerBackward(std::vector<TMatrixT<AReal>> &activation_gradients_backward,
172  TMatrixT<AReal> &weight_gradients, TMatrixT<AReal> &bias_gradients,
173  std::vector<TMatrixT<AReal>> &df,
174  const std::vector<TMatrixT<AReal>> &activation_gradients,
175  const TMatrixT<AReal> &weights,
176  const std::vector<TMatrixT<AReal>> &activations_backward, size_t batchSize,
177  size_t inputHeight, size_t inputWidth, size_t depth, size_t height,
178  size_t width, size_t filterDepth, size_t filterHeight, size_t filterWidth,
179  size_t nLocalViews)
180 {
181 
182  // Update derivatives
183  size_t m, n;
184  m = activation_gradients[0].GetNrows();
185  n = activation_gradients[0].GetNcols();
186 
187  for (size_t i = 0; i < batchSize; i++) {
188  for (size_t j = 0; j < (size_t)m; j++) {
189  for (size_t k = 0; k < (size_t)n; k++) {
190  df[i](j, k) *= activation_gradients[i](j, k);
191  }
192  }
193  }
194 
195  // Calculate the activation gradients of the previous layer
196  CalculateConvActivationGradients(activation_gradients_backward, df, weights, batchSize, inputHeight, inputWidth,
197  depth, height, width, filterDepth, filterHeight, filterWidth);
198 
199  // Calculate the weight gradients
200  CalculateConvWeightGradients(weight_gradients, df, activations_backward, batchSize, inputHeight, inputWidth, depth,
201  height, width, filterDepth, filterHeight, filterWidth, nLocalViews);
202 
203  // Calculate the bias gradients
204  CalculateConvBiasGradients(bias_gradients, df, batchSize, depth, nLocalViews);
205 }
206 
207 //______________________________________________________________________________
208 template <typename AReal>
209 void TReference<AReal>::CalculateConvActivationGradients(std::vector<TMatrixT<AReal>> &activation_gradients_backward,
210  const std::vector<TMatrixT<AReal>> &df,
211  const TMatrixT<AReal> &weights, size_t batchSize,
212  size_t inputHeight, size_t inputWidth, size_t depth,
213  size_t height, size_t width, size_t filterDepth,
214  size_t filterHeight, size_t filterWidth)
215 {
216 
217  if (activation_gradients_backward.size() == 0) return;
218  // need to implement
219  // Transform the weights
220  TMatrixT<AReal> rotWeights(filterDepth, depth * filterHeight * filterWidth);
221  RotateWeights(rotWeights, weights, filterDepth, filterHeight, filterWidth, weights.GetNrows());
222 
223  // Calculate the zero paddings
224  size_t tempZeroPaddingHeight = (size_t)(floor((inputHeight - height + filterHeight - 1) / 2));
225  size_t tempZeroPaddingWidth = (size_t)(floor((inputWidth - width + filterWidth - 1) / 2));
226 
227  // Calculate the number of local views and the number of pixles in each view
228  size_t tempNLocalViews = inputHeight * inputWidth;
229  size_t tempNLocalViewPixels = depth * filterHeight * filterWidth;
230 
231  size_t tempStrideRows = 1;
232  size_t tempStrideCols = 1;
233 
234  // An entire convolution follows
235  for (size_t i = 0; i < batchSize; i++) {
236  TMatrixT<AReal> dfTr(tempNLocalViews, tempNLocalViewPixels);
237  Im2col(dfTr, df[i], inputHeight, inputWidth, filterHeight, filterWidth, tempStrideRows, tempStrideCols,
238  tempZeroPaddingHeight, tempZeroPaddingWidth);
239 
240  activation_gradients_backward[i].MultT(rotWeights, dfTr);
241  }
242 
243  return ;
244 }
245 
246 //______________________________________________________________________________
247 template <typename AReal>
248 void TReference<AReal>::CalculateConvWeightGradients(TMatrixT<AReal> &weight_gradients,
249  const std::vector<TMatrixT<AReal>> &df,
250  const std::vector<TMatrixT<AReal>> &activations_backward,
251  size_t batchSize, size_t inputHeight, size_t inputWidth,
252  size_t depth, size_t height, size_t width, size_t filterDepth,
253  size_t filterHeight, size_t filterWidth, size_t nLocalViews)
254 {
255 
256  // reinitialize the weight gradients to 0
257  for (Int_t i = 0; i < weight_gradients.GetNrows(); i++) {
258  for (Int_t j = 0; j < weight_gradients.GetNcols(); j++) {
259  weight_gradients(i, j) = 0;
260  }
261  }
262  for (size_t i = 0; i < batchSize; i++) {
263  // Calculate the zero paddings
264  size_t tempZeroPaddingHeight = (filterHeight - height + inputHeight - 1) / 2;
265  size_t tempZeroPaddingWidth = (filterWidth - width + inputWidth - 1) / 2;
266 
267  size_t tempNLocalViews = filterHeight * filterWidth;
268  size_t tempNLocalViewPixels = inputHeight * inputWidth;
269 
270  size_t tempStrideRows = 1;
271  size_t tempStrideCols = 1;
272 
273  for (size_t j = 0; j < depth; j++) {
274 
275  // row matrix
276  TMatrixT<AReal> rowDelta(1, nLocalViews);
277  for (size_t k = 0; k < nLocalViews; k++) {
278  rowDelta(0, k) = df[i](j, k);
279  }
280 
281  // convolution
282  TMatrixT<AReal> res(filterDepth, filterHeight * filterWidth);
283 
284  TMatrixT<AReal> rowDeltaTr(tempNLocalViews, tempNLocalViewPixels);
285  Im2col(rowDeltaTr, rowDelta, height, width, inputHeight, inputWidth, tempStrideRows, tempStrideCols,
286  tempZeroPaddingHeight, tempZeroPaddingWidth);
287 
288  res.MultT(activations_backward[i], rowDeltaTr);
289 
290  for (size_t k = 0; k < filterDepth; k++) {
291  for (size_t l = 0; l < filterHeight * filterWidth; l++) {
292  weight_gradients(j, k * (filterHeight * filterWidth) + l) += res(k, (tempNLocalViews - 1) - l);
293  }
294  }
295  }
296  }
297 #if 0
298  // to remove warning
299  (void)weight_gradients;
300  (void)df;
301  (void)activations_backward;
302  (void) batchSize;
303  (void) inputHeight;
304  (void)inputWidth;
305  (void)depth;
306  (void)height;
307  (void) width;
308  (void)filterDepth;
309  (void)filterHeight;
310  (void)filterWidth;
311  (void)nLocalViews;
312 #endif
313 }
314 
315 //______________________________________________________________________________
316 template <typename AReal>
317 void TReference<AReal>::CalculateConvBiasGradients(TMatrixT<AReal> &bias_gradients, const std::vector<TMatrixT<AReal>> &df,
318  size_t batchSize, size_t depth, size_t nLocalViews)
319 {
320  for (size_t i = 0; i < depth; i++) {
321  AReal sum = 0;
322  for (size_t j = 0; j < nLocalViews; j++) {
323  for (size_t k = 0; k < batchSize; k++) {
324  sum += df[k](i, j);
325  }
326  }
327  bias_gradients(i, 0) = sum;
328  }
329 }
330 #endif
331 
332 //______________________________________________________________________________
333 template <typename AReal>
334 void TReference<AReal>::Downsample(TMatrixT<AReal> &A, TMatrixT<AReal> &B, const TMatrixT<AReal> &C, size_t imgHeight,
335  size_t imgWidth, size_t fltHeight, size_t fltWidth, size_t strideRows,
336  size_t strideCols)
337 {
338  // image boudaries
339  int imgHeightBound = imgHeight - (fltHeight - 1) / 2 - 1;
340  int imgWidthBound = imgWidth - (fltWidth - 1) / 2 - 1;
341  size_t currLocalView = 0;
342 
343  // centers
344  for (int i = fltHeight / 2; i <= imgHeightBound; i += strideRows) {
345  for (int j = fltWidth / 2; j <= imgWidthBound; j += strideCols) {
346  // within local views
347  for (int m = 0; m < C.GetNrows(); m++) {
348  AReal value = -std::numeric_limits<AReal>::max();
349 
350  for (int k = i - Int_t(fltHeight) / 2; k <= i + (Int_t(fltHeight) - 1) / 2; k++) {
351  for (int l = j - Int_t(fltWidth) / 2; l <= j + (Int_t(fltWidth) - 1) / 2; l++) {
352  if (C(m, k * imgWidth + l) > value) {
353  value = C(m, k * imgWidth + l);
354  B(m, currLocalView) = k * imgWidth + l;
355  }
356  }
357  }
358  A(m, currLocalView) = value;
359  }
360  currLocalView++;
361  }
362  }
363 }
364 
365 //______________________________________________________________________________
366 template <typename AReal>
367 void TReference<AReal>::MaxPoolLayerBackward(TMatrixT<AReal> &activationGradientsBackward,
368  const TMatrixT<AReal> &activationGradients,
369  const TMatrixT<AReal> &indexMatrix,
370  size_t /* imgHeight */, size_t /* imgWidth */, size_t /* fltHeight */,
371  size_t /* fltWidth */, size_t /* strideRows */, size_t /* strideCols */,
372  size_t nLocalViews)
373 {
374  size_t depth = activationGradientsBackward.GetNrows();
375 
376  for (size_t j = 0; j < depth; j++) {
377  // initialize to zeros
378  for (size_t t = 0; t < (size_t)activationGradientsBackward.GetNcols(); t++) {
379  activationGradientsBackward[j][t] = 0;
380  }
381 
382  // set values
383  for (size_t k = 0; k < nLocalViews; k++) {
384  AReal grad = activationGradients[j][k];
385  size_t winningIdx = indexMatrix[j][k];
386  activationGradientsBackward[j][winningIdx] += grad;
387  }
388  }
389 }
390 
391 //______________________________________________________________________________
392 template <typename AReal>
393 void TReference<AReal>::Reshape(TMatrixT<AReal> &A, const TMatrixT<AReal> &B)
394 {
395  auto nColsA = A.GetNcols();
396  auto nColsB = B.GetNcols();
397 
398  for (Int_t i = 0; i < A.GetNrows(); i++) {
399  for (Int_t j = 0; j < A.GetNcols(); j++) {
400  auto nElem = i * nColsA + j;
401  A(i, j) = B(nElem / nColsB, nElem % nColsB);
402  }
403  }
404 }
405 
406 //______________________________________________________________________________
407 template <typename AReal>
408 void TReference<AReal>::Flatten(TMatrixT<AReal> &A, const std::vector<TMatrixT<AReal>> &B, size_t size, size_t nRows,
409  size_t nCols)
410 {
411  for (size_t i = 0; i < (size_t)size; i++) {
412  for (size_t j = 0; j < (size_t)nRows; j++) {
413  for (size_t k = 0; k < (size_t)nCols; k++) {
414  A(i, j * nCols + k) = B[i](j, k);
415  }
416  }
417  }
418 }
419 
420 //______________________________________________________________________________
421 template <typename AReal>
422 void TReference<AReal>::Deflatten(std::vector<TMatrixT<AReal>> &A, const TMatrixT<AReal> &B, size_t size, size_t nRows,
423  size_t nCols)
424 {
425  for (size_t i = 0; i < (size_t)size; i++) {
426  for (size_t j = 0; j < (size_t)nRows; j++) {
427  for (size_t k = 0; k < (size_t)nCols; k++) {
428  A[i](j, k) = B(i, j * nCols + k);
429  }
430  }
431  }
432 }
433 
434 //______________________________________________________________________________
435 template <typename AReal>
436 void TReference<AReal>::Rearrange(std::vector<TMatrixT<AReal>> &out, const std::vector<TMatrixT<AReal>> &in)
437 {
438  // B x T x D out --- T x B x D in*/
439  auto B = out.size();
440  auto T = out[0].GetNrows();
441  auto D = out[0].GetNcols();
442  if ((T != (Int_t)in.size()) || (Int_t(B) != in[0].GetNrows()) || (D != in[0].GetNcols())) {
443  std::cout << "Incompatible Dimensions\n"
444  << in.size() << "x" << in[0].GetNrows() << "x" << in[0].GetNcols() << " --> " << B << "x" << T << "x"
445  << D << "\n";
446  return;
447  }
448  for (size_t i = 0; i < B; ++i) {
449  for (Int_t j = 0; j < T; ++j) {
450  for (Int_t k = 0; k < D; ++k) {
451  out[i](j, k) = in[j](i, k);
452  }
453  }
454  }
455  return;
456 }
457 
458 } // namespace DNN
459 } // namespace TMVA