25 #pragma GCC diagnostic push
26 #pragma GCC diagnostic ignored "-Wshadow"
30 #pragma GCC diagnostic pop
38 template<
typename AReal>
39 void TCpu<AReal>::Multiply(TCpuMatrix<AReal> &C,
40 const TCpuMatrix<AReal> &A,
41 const TCpuMatrix<AReal> &B)
43 int m = (int) A.GetNrows();
44 int k = (int) A.GetNcols();
45 int n = (int) B.GetNcols();
47 R__ASSERT((
int) C.GetNrows() == m);
48 R__ASSERT((
int) C.GetNcols() == n);
49 R__ASSERT((
int) B.GetNrows() == k);
59 const AReal * APointer = A.GetRawDataPointer();
60 const AReal * BPointer = B.GetRawDataPointer();
61 AReal * CPointer = C.GetRawDataPointer();
63 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
64 APointer, &m, BPointer, &k, &beta, CPointer, &m);
66 TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
73 template<
typename AReal>
74 void TCpu<AReal>::TransposeMultiply(TCpuMatrix<AReal> &C,
75 const TCpuMatrix<AReal> &A,
76 const TCpuMatrix<AReal> &B,
77 AReal alpha, AReal beta)
80 int m = (int) A.GetNcols();
81 int k = (int) A.GetNrows();
82 int n = (int) B.GetNcols();
84 R__ASSERT((
int) C.GetNrows() == m);
85 R__ASSERT((
int) C.GetNcols() == n);
86 R__ASSERT((
int) B.GetNrows() == k);
94 const AReal *APointer = A.GetRawDataPointer();
95 const AReal *BPointer = B.GetRawDataPointer();
96 AReal *CPointer = C.GetRawDataPointer();
98 ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
99 APointer, &k, BPointer, &k, &beta, CPointer, &m);
101 TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
103 tmp = alpha*tmp + beta;
109 template<
typename AReal>
110 void TCpu<AReal>::Hadamard(TCpuMatrix<AReal> &B,
111 const TCpuMatrix<AReal> &A)
113 const AReal *dataA = A.GetRawDataPointer();
114 AReal *dataB = B.GetRawDataPointer();
116 size_t nElements = A.GetNoElements();
117 R__ASSERT(B.GetNoElements() == nElements);
118 size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
120 auto f = [&](UInt_t workerID)
122 for (
size_t j = 0; j < nSteps; ++j) {
123 size_t idx = workerID+j;
124 if (idx >= nElements)
break;
125 dataB[idx] *= dataA[idx];
130 if (nSteps < nElements) {
132 B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
134 for (
size_t i = 0; i < nElements ; i+= nSteps)
144 template<
typename AReal>
145 void TCpu<AReal>::Hadamard(TCpuTensor<AReal> &B,
146 const TCpuTensor<AReal> &A)
148 const AReal *dataA = A.GetRawDataPointer();
149 AReal *dataB = B.GetRawDataPointer();
151 size_t nElements = A.GetNoElements();
152 R__ASSERT(B.GetNoElements() == nElements);
153 size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
155 auto f = [&](UInt_t workerID)
157 for (
size_t j = 0; j < nSteps; ++j) {
158 size_t idx = workerID+j;
159 if (idx >= nElements)
break;
160 dataB[idx] *= dataA[idx];
165 if (nSteps < nElements) {
167 TMVA::Config::Instance().GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
169 for (
size_t i = 0; i < nElements ; i+= nSteps)
186 template<
typename AReal>
187 bool TCpu<AReal>::AlmostEquals(
const TCpuMatrix<AReal> &A,
const TCpuMatrix<AReal> &B,
double epsilon)
189 if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
190 Fatal(
"AlmostEquals",
"The passed matrices have unequal shapes.");
193 const AReal *dataA = A.GetRawDataPointer();
194 const AReal *dataB = B.GetRawDataPointer();
195 size_t nElements = A.GetNoElements();
197 for(
size_t i = 0; i < nElements; i++) {
198 if(fabs(dataA[i] - dataB[i]) > epsilon)
return false;
204 template<
typename AReal>
205 void TCpu<AReal>::SumColumns(TCpuMatrix<AReal> &B,
206 const TCpuMatrix<AReal> &A,
207 AReal alpha, AReal beta)
209 #ifdef R__HAS_TMVACPU
210 int m = (int) A.GetNrows();
211 int n = (int) A.GetNcols();
218 const AReal * APointer = A.GetRawDataPointer();
219 AReal * BPointer = B.GetRawDataPointer();
221 ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
222 TCpuMatrix<AReal>::GetOnePointer(), &inc,
223 &beta, BPointer, &inc);
225 TMatrixT<AReal> tmp(B.GetNrows(), B.GetNcols());
226 TReference<AReal>::SumColumns(tmp,A);
227 tmp = alpha*tmp + beta;
233 template<
typename AReal>
234 void TCpu<AReal>::ScaleAdd(TCpuMatrix<AReal> &B,
235 const TCpuMatrix<AReal> &A,
238 #ifdef R__HAS_TMVACPU
239 int n = (int) (A.GetNcols() * A.GetNrows());
242 const AReal *x = A.GetRawDataPointer();
243 AReal *y = B.GetRawDataPointer();
245 ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
248 TReference<AReal>::ScaleAdd(tmp, A, alpha);
254 template<
typename AReal>
255 void TCpu<AReal>::Copy(TCpuMatrix<AReal> &B,
256 const TCpuMatrix<AReal> &A)
258 auto f = [](AReal x) {
return x;};
264 template<
typename AReal>
265 void TCpu<AReal>::ScaleAdd(TCpuTensor<AReal> &B,
266 const TCpuTensor<AReal> &A,
270 for (
size_t i = 0; i < B.GetFirstSize(); ++i) {
271 TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();
272 ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);
277 template<
typename AReal>
278 void TCpu<AReal>::Copy(TCpuTensor<AReal> &B,
279 const TCpuTensor<AReal> &A)
282 auto f = [](AReal x) {
return x;};
287 template <
typename AReal>
288 void TCpu<AReal>::ConstAdd(TCpuMatrix<AReal> &A, AReal beta)
290 auto f = [beta](AReal x) {
return x + beta; };
295 template <
typename AReal>
296 void TCpu<AReal>::ConstMult(TCpuMatrix<AReal> &A, AReal beta)
298 auto f = [beta](AReal x) {
return x * beta; };
303 template <
typename AReal>
304 void TCpu<AReal>::ReciprocalElementWise(TCpuMatrix<AReal> &A)
306 auto f = [](AReal x) {
return 1.0 / x; };
311 template <
typename AReal>
312 void TCpu<AReal>::SquareElementWise(TCpuMatrix<AReal> &A)
314 auto f = [](AReal x) {
return x * x; };
319 template <
typename AReal>
320 void TCpu<AReal>::SqrtElementWise(TCpuMatrix<AReal> &A)
322 auto f = [](AReal x) {
return sqrt(x); };
328 template<
typename AReal>
329 void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A,
const TCpuMatrix<AReal> & M,
const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)
333 AReal * a = A.GetRawDataPointer();
334 const AReal * m = M.GetRawDataPointer();
335 const AReal * v = V.GetRawDataPointer();
336 for (
size_t index = 0; index < A.GetNoElements() ; ++index) {
337 a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);
342 template<
typename AReal>
343 void TCpu<AReal>::AdamUpdateFirstMom(TCpuMatrix<AReal> &A,
const TCpuMatrix<AReal> & B, AReal beta)
347 AReal * a = A.GetRawDataPointer();
348 const AReal * b = B.GetRawDataPointer();
349 for (
size_t index = 0; index < A.GetNoElements() ; ++index) {
350 a[index] = beta * a[index] + (1.-beta) * b[index];
354 template<
typename AReal>
355 void TCpu<AReal>::AdamUpdateSecondMom(TCpuMatrix<AReal> &A,
const TCpuMatrix<AReal> & B, AReal beta)
359 AReal * a = A.GetRawDataPointer();
360 const AReal * b = B.GetRawDataPointer();
361 for (
size_t index = 0; index < A.GetNoElements() ; ++index) {
362 a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];