Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
Arithmetic.hxx
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 ////////////////////////////////////////////////////////////
13 // Implementation of Helper arithmetic functions for the //
14 // multi-threaded CPU implementation of DNNs. //
15 ////////////////////////////////////////////////////////////
16 
18 
19 #ifdef R__HAS_TMVACPU
21 #else
23 #endif
24 
25 #pragma GCC diagnostic push
26 #pragma GCC diagnostic ignored "-Wshadow"
27 
28 //#include "tbb/tbb.h"
29 
30 #pragma GCC diagnostic pop
31 
32 namespace TMVA
33 {
34 namespace DNN
35 {
36 
37 //____________________________________________________________________________
38 template<typename AReal>
39 void TCpu<AReal>::Multiply(TCpuMatrix<AReal> &C,
40  const TCpuMatrix<AReal> &A,
41  const TCpuMatrix<AReal> &B)
42 {
43  int m = (int) A.GetNrows();
44  int k = (int) A.GetNcols();
45  int n = (int) B.GetNcols();
46 
47  R__ASSERT((int) C.GetNrows() == m);
48  R__ASSERT((int) C.GetNcols() == n);
49  R__ASSERT((int) B.GetNrows() == k);
50 
51 #ifdef R__HAS_TMVACPU
52 
53  char transa = 'N';
54  char transb = 'N';
55 
56  AReal alpha = 1.0;
57  AReal beta = 0.0;
58 
59  const AReal * APointer = A.GetRawDataPointer();
60  const AReal * BPointer = B.GetRawDataPointer();
61  AReal * CPointer = C.GetRawDataPointer();
62 
63  ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
64  APointer, &m, BPointer, &k, &beta, CPointer, &m);
65 #else
66  TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
67  tmp.Mult(A,B);
68  C = tmp;
69 #endif
70 }
71 
72 //____________________________________________________________________________
73 template<typename AReal>
74 void TCpu<AReal>::TransposeMultiply(TCpuMatrix<AReal> &C,
75  const TCpuMatrix<AReal> &A,
76  const TCpuMatrix<AReal> &B,
77  AReal alpha, AReal beta)
78 {
79 #ifdef R__HAS_TMVACPU
80  int m = (int) A.GetNcols();
81  int k = (int) A.GetNrows();
82  int n = (int) B.GetNcols();
83 
84  R__ASSERT((int) C.GetNrows() == m);
85  R__ASSERT((int) C.GetNcols() == n);
86  R__ASSERT((int) B.GetNrows() == k);
87 
88  char transa = 'T';
89  char transb = 'N';
90 
91  //AReal alpha = 1.0;
92  //AReal beta = 0.0;
93 
94  const AReal *APointer = A.GetRawDataPointer();
95  const AReal *BPointer = B.GetRawDataPointer();
96  AReal *CPointer = C.GetRawDataPointer();
97 
98  ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
99  APointer, &k, BPointer, &k, &beta, CPointer, &m);
100 #else
101  TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
102  tmp.TMult(A,B);
103  tmp = alpha*tmp + beta;
104  C = tmp;
105 #endif
106 }
107 
108 //____________________________________________________________________________
109 template<typename AReal>
110 void TCpu<AReal>::Hadamard(TCpuMatrix<AReal> &B,
111  const TCpuMatrix<AReal> &A)
112 {
113  const AReal *dataA = A.GetRawDataPointer();
114  AReal *dataB = B.GetRawDataPointer();
115 
116  size_t nElements = A.GetNoElements();
117  R__ASSERT(B.GetNoElements() == nElements);
118  size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
119 
120  auto f = [&](UInt_t workerID)
121  {
122  for (size_t j = 0; j < nSteps; ++j) {
123  size_t idx = workerID+j;
124  if (idx >= nElements) break;
125  dataB[idx] *= dataA[idx];
126  }
127  return 0;
128  };
129 
130  if (nSteps < nElements) {
131 #ifdef DL_USE_MTE
132  B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
133 #else
134  for (size_t i = 0; i < nElements ; i+= nSteps)
135  f(i);
136 #endif
137  }
138  else {
139  f(0);
140  }
141 }
142 
143 //____________________________________________________________________________
144 template<typename AReal>
145 void TCpu<AReal>::Hadamard(TCpuTensor<AReal> &B,
146  const TCpuTensor<AReal> &A)
147 {
148  const AReal *dataA = A.GetRawDataPointer();
149  AReal *dataB = B.GetRawDataPointer();
150 
151  size_t nElements = A.GetNoElements();
152  R__ASSERT(B.GetNoElements() == nElements);
153  size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
154 
155  auto f = [&](UInt_t workerID)
156  {
157  for (size_t j = 0; j < nSteps; ++j) {
158  size_t idx = workerID+j;
159  if (idx >= nElements) break;
160  dataB[idx] *= dataA[idx];
161  }
162  return 0;
163  };
164 
165  if (nSteps < nElements) {
166 #ifdef DL_USE_MTE
167  TMVA::Config::Instance().GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
168 #else
169  for (size_t i = 0; i < nElements ; i+= nSteps)
170  f(i);
171 #endif
172  }
173  else {
174  f(0);
175  }
176 }
177 
178 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
179 /// \brief Checks two matrices for element-wise equality.
180 /// \tparam AReal An architecture-specific floating point number type.
181 /// \param A The first matrix.
182 /// \param B The second matrix.
183 /// \param epsilon Equality tolerance, needed to address floating point arithmetic.
184 /// \return Whether the two matrices can be considered equal element-wise
185 ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
186 template<typename AReal>
187 bool TCpu<AReal>::AlmostEquals(const TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> &B, double epsilon)
188 {
189  if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
190  Fatal("AlmostEquals", "The passed matrices have unequal shapes.");
191  }
192 
193  const AReal *dataA = A.GetRawDataPointer();
194  const AReal *dataB = B.GetRawDataPointer();
195  size_t nElements = A.GetNoElements();
196 
197  for(size_t i = 0; i < nElements; i++) {
198  if(fabs(dataA[i] - dataB[i]) > epsilon) return false;
199  }
200  return true;
201 }
202 
203 //____________________________________________________________________________
204 template<typename AReal>
205 void TCpu<AReal>::SumColumns(TCpuMatrix<AReal> &B,
206  const TCpuMatrix<AReal> &A,
207  AReal alpha, AReal beta)
208 {
209 #ifdef R__HAS_TMVACPU
210  int m = (int) A.GetNrows();
211  int n = (int) A.GetNcols();
212  int inc = 1;
213 
214  // AReal alpha = 1.0;
215  //AReal beta = 0.0;
216  char trans = 'T';
217 
218  const AReal * APointer = A.GetRawDataPointer();
219  AReal * BPointer = B.GetRawDataPointer();
220 
221  ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
222  TCpuMatrix<AReal>::GetOnePointer(), &inc,
223  &beta, BPointer, &inc);
224 #else
225  TMatrixT<AReal> tmp(B.GetNrows(), B.GetNcols());
226  TReference<AReal>::SumColumns(tmp,A);
227  tmp = alpha*tmp + beta;
228  B = tmp;
229 #endif
230 }
231 
232 //____________________________________________________________________________
233 template<typename AReal>
234 void TCpu<AReal>::ScaleAdd(TCpuMatrix<AReal> &B,
235  const TCpuMatrix<AReal> &A,
236  AReal alpha)
237 {
238 #ifdef R__HAS_TMVACPU
239  int n = (int) (A.GetNcols() * A.GetNrows());
240  int inc = 1;
241 
242  const AReal *x = A.GetRawDataPointer();
243  AReal *y = B.GetRawDataPointer();
244 
245  ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
246 #else
247  TMatrixT<AReal> tmp;
248  TReference<AReal>::ScaleAdd(tmp, A, alpha);
249  B = tmp;
250 #endif
251 }
252 
253 //____________________________________________________________________________
254 template<typename AReal>
255 void TCpu<AReal>::Copy(TCpuMatrix<AReal> &B,
256  const TCpuMatrix<AReal> &A)
257 {
258  auto f = [](AReal x) {return x;};
259  B.MapFrom(f, A);
260 }
261 
262 
263 //____________________________________________________________________________
264 template<typename AReal>
265 void TCpu<AReal>::ScaleAdd(TCpuTensor<AReal> &B,
266  const TCpuTensor<AReal> &A,
267  AReal alpha)
268 {
269  // should re-implemented at tensor level
270  for (size_t i = 0; i < B.GetFirstSize(); ++i) {
271  TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();
272  ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);
273  }
274 }
275 
276 //____________________________________________________________________________
277 template<typename AReal>
278 void TCpu<AReal>::Copy(TCpuTensor<AReal> &B,
279  const TCpuTensor<AReal> &A)
280 {
281 
282  auto f = [](AReal x) {return x;};
283  B.MapFrom(f, A);
284 }
285 
286 //____________________________________________________________________________
287 template <typename AReal>
288 void TCpu<AReal>::ConstAdd(TCpuMatrix<AReal> &A, AReal beta)
289 {
290  auto f = [beta](AReal x) { return x + beta; };
291  A.Map(f);
292 }
293 
294 //____________________________________________________________________________
295 template <typename AReal>
296 void TCpu<AReal>::ConstMult(TCpuMatrix<AReal> &A, AReal beta)
297 {
298  auto f = [beta](AReal x) { return x * beta; };
299  A.Map(f);
300 }
301 
302 //____________________________________________________________________________
303 template <typename AReal>
304 void TCpu<AReal>::ReciprocalElementWise(TCpuMatrix<AReal> &A)
305 {
306  auto f = [](AReal x) { return 1.0 / x; };
307  A.Map(f);
308 }
309 
310 //____________________________________________________________________________
311 template <typename AReal>
312 void TCpu<AReal>::SquareElementWise(TCpuMatrix<AReal> &A)
313 {
314  auto f = [](AReal x) { return x * x; };
315  A.Map(f);
316 }
317 
318 //____________________________________________________________________________
319 template <typename AReal>
320 void TCpu<AReal>::SqrtElementWise(TCpuMatrix<AReal> &A)
321 {
322  auto f = [](AReal x) { return sqrt(x); };
323  A.Map(f);
324 }
325 
326 /// Adam updates
327 //____________________________________________________________________________
328 template<typename AReal>
329 void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & M, const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)
330 {
331  // ADAM update the weights.
332  // Weight = Weight - alpha * M / (sqrt(V) + epsilon)
333  AReal * a = A.GetRawDataPointer();
334  const AReal * m = M.GetRawDataPointer();
335  const AReal * v = V.GetRawDataPointer();
336  for (size_t index = 0; index < A.GetNoElements() ; ++index) {
337  a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);
338  }
339 }
340 
341 //____________________________________________________________________________
342 template<typename AReal>
343 void TCpu<AReal>::AdamUpdateFirstMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)
344 {
345  // First momentum weight gradient update for ADAM
346  // Mt = beta1 * Mt-1 + (1-beta1) * WeightGradients
347  AReal * a = A.GetRawDataPointer();
348  const AReal * b = B.GetRawDataPointer();
349  for (size_t index = 0; index < A.GetNoElements() ; ++index) {
350  a[index] = beta * a[index] + (1.-beta) * b[index];
351  }
352 }
353 //____________________________________________________________________________
354 template<typename AReal>
355 void TCpu<AReal>::AdamUpdateSecondMom(TCpuMatrix<AReal> &A, const TCpuMatrix<AReal> & B, AReal beta)
356 {
357  // Second momentum weight gradient update for ADAM
358  // Vt = beta2 * Vt-1 + (1-beta2) * WeightGradients^2
359  AReal * a = A.GetRawDataPointer();
360  const AReal * b = B.GetRawDataPointer();
361  for (size_t index = 0; index < A.GetNoElements() ; ++index) {
362  a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];
363  }
364 }
365 
366 
367 } // DNN
368 } // TMVA