25 #pragma GCC diagnostic push 
   26 #pragma GCC diagnostic ignored "-Wshadow" 
   30 #pragma GCC diagnostic pop 
   38 template<
typename AReal>
 
   39 void TCpu<AReal>::Multiply(TCpuMatrix<AReal> &C,
 
   40                             const TCpuMatrix<AReal> &A,
 
   41                             const TCpuMatrix<AReal> &B)
 
   43     int m = (int) A.GetNrows();
 
   44     int k = (int) A.GetNcols();
 
   45     int n = (int) B.GetNcols();
 
   47     R__ASSERT((
int) C.GetNrows() == m);
 
   48     R__ASSERT((
int) C.GetNcols() == n);
 
   49     R__ASSERT((
int) B.GetNrows() == k);
 
   59     const AReal * APointer = A.GetRawDataPointer();
 
   60     const AReal * BPointer = B.GetRawDataPointer();
 
   61           AReal * CPointer = C.GetRawDataPointer();
 
   63     ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
 
   64                             APointer, &m, BPointer, &k, &beta, CPointer, &m);
 
   66    TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
 
   73 template<
typename AReal>
 
   74 void TCpu<AReal>::TransposeMultiply(TCpuMatrix<AReal> &C,
 
   75                                      const TCpuMatrix<AReal> &A,
 
   76                                      const TCpuMatrix<AReal> &B,
 
   77                                      AReal alpha, AReal beta)
 
   80     int m = (int) A.GetNcols();
 
   81     int k = (int) A.GetNrows();
 
   82     int n = (int) B.GetNcols();
 
   84     R__ASSERT((
int) C.GetNrows() == m);
 
   85     R__ASSERT((
int) C.GetNcols() == n);
 
   86     R__ASSERT((
int) B.GetNrows() == k);
 
   94     const AReal *APointer = A.GetRawDataPointer();
 
   95     const AReal *BPointer = B.GetRawDataPointer();
 
   96           AReal *CPointer = C.GetRawDataPointer();
 
   98     ::TMVA::DNN::Blas::Gemm(&transa, &transb, &m, &n, &k, &alpha,
 
   99                             APointer, &k, BPointer, &k, &beta, CPointer, &m);
 
  101    TMatrixT<AReal> tmp(C.GetNrows(), C.GetNcols());
 
  103    tmp = alpha*tmp + beta;
 
  109 template<
typename AReal>
 
  110 void TCpu<AReal>::Hadamard(TCpuMatrix<AReal> &B,
 
  111                             const TCpuMatrix<AReal> &A)
 
  113    const AReal *dataA      = A.GetRawDataPointer();
 
  114    AReal *dataB      = B.GetRawDataPointer();
 
  116    size_t nElements =  A.GetNoElements();
 
  117    R__ASSERT(B.GetNoElements() == nElements);
 
  118    size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
 
  120    auto f = [&](UInt_t workerID)
 
  122       for (
size_t j = 0; j < nSteps; ++j) {
 
  123          size_t idx = workerID+j;
 
  124          if (idx >= nElements) 
break;
 
  125          dataB[idx] *= dataA[idx];
 
  130    if (nSteps < nElements) {
 
  132       B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
 
  134       for (
size_t i = 0;  i < nElements ; i+= nSteps)
 
  144 template<
typename AReal>
 
  145 void TCpu<AReal>::Hadamard(TCpuTensor<AReal> &B,
 
  146                             const TCpuTensor<AReal> &A)
 
  148    const AReal *dataA      = A.GetRawDataPointer();
 
  149    AReal *dataB      = B.GetRawDataPointer();
 
  151    size_t nElements =  A.GetNoElements();
 
  152    R__ASSERT(B.GetNoElements() == nElements);
 
  153    size_t nSteps = TCpuMatrix<AReal>::GetNWorkItems(nElements);
 
  155    auto f = [&](UInt_t workerID)
 
  157       for (
size_t j = 0; j < nSteps; ++j) {
 
  158          size_t idx = workerID+j;
 
  159          if (idx >= nElements) 
break;
 
  160          dataB[idx] *= dataA[idx];
 
  165    if (nSteps < nElements) {
 
  167       TMVA::Config::Instance().GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps));
 
  169       for (
size_t i = 0;  i < nElements ; i+= nSteps)
 
  186 template<
typename AReal>
 
  187 bool TCpu<AReal>::AlmostEquals(
const TCpuMatrix<AReal> &A, 
const TCpuMatrix<AReal> &B, 
double epsilon)
 
  189     if (A.GetNrows() != B.GetNrows() || A.GetNcols() != B.GetNcols()) {
 
  190         Fatal(
"AlmostEquals", 
"The passed matrices have unequal shapes.");
 
  193     const AReal *dataA = A.GetRawDataPointer();
 
  194     const AReal *dataB = B.GetRawDataPointer();
 
  195     size_t nElements =  A.GetNoElements();
 
  197     for(
size_t i = 0; i < nElements; i++) {
 
  198         if(fabs(dataA[i] - dataB[i]) > epsilon) 
return false;
 
  204 template<
typename AReal>
 
  205 void TCpu<AReal>::SumColumns(TCpuMatrix<AReal> &B,
 
  206                               const TCpuMatrix<AReal> &A,
 
  207                               AReal alpha, AReal beta)
 
  209 #ifdef R__HAS_TMVACPU 
  210    int m = (int) A.GetNrows();
 
  211    int n = (int) A.GetNcols();
 
  218    const AReal * APointer = A.GetRawDataPointer();
 
  219          AReal * BPointer = B.GetRawDataPointer();
 
  221    ::TMVA::DNN::Blas::Gemv(&trans, &m, &n, &alpha, APointer, &m,
 
  222                            TCpuMatrix<AReal>::GetOnePointer(), &inc,
 
  223                            &beta, BPointer, &inc);
 
  225    TMatrixT<AReal> tmp(B.GetNrows(), B.GetNcols());
 
  226    TReference<AReal>::SumColumns(tmp,A);
 
  227    tmp = alpha*tmp + beta;
 
  233 template<
typename AReal>
 
  234 void TCpu<AReal>::ScaleAdd(TCpuMatrix<AReal> &B,
 
  235                             const TCpuMatrix<AReal> &A,
 
  238 #ifdef R__HAS_TMVACPU 
  239    int n = (int) (A.GetNcols() * A.GetNrows());
 
  242    const AReal *x = A.GetRawDataPointer();
 
  243    AReal *y = B.GetRawDataPointer();
 
  245    ::TMVA::DNN::Blas::Axpy(&n, &alpha, x, &inc, y, &inc);
 
  248    TReference<AReal>::ScaleAdd(tmp, A, alpha);
 
  254 template<
typename AReal>
 
  255 void TCpu<AReal>::Copy(TCpuMatrix<AReal> &B,
 
  256                         const TCpuMatrix<AReal> &A)
 
  258    auto f = [](AReal x) {
return x;};
 
  264 template<
typename AReal>
 
  265 void TCpu<AReal>::ScaleAdd(TCpuTensor<AReal> &B,
 
  266                             const TCpuTensor<AReal> &A,
 
  270    for (
size_t i = 0; i < B.GetFirstSize(); ++i) {
 
  271       TCpuMatrix<AReal> B_m = B.At(i).GetMatrix();
 
  272       ScaleAdd(B_m, A.At(i).GetMatrix(), alpha);
 
  277 template<
typename AReal>
 
  278 void TCpu<AReal>::Copy(TCpuTensor<AReal> &B,
 
  279                             const TCpuTensor<AReal> &A)
 
  282    auto f = [](AReal x) {
return x;};
 
  287 template <
typename AReal>
 
  288 void TCpu<AReal>::ConstAdd(TCpuMatrix<AReal> &A, AReal beta)
 
  290    auto f = [beta](AReal x) { 
return x + beta; };
 
  295 template <
typename AReal>
 
  296 void TCpu<AReal>::ConstMult(TCpuMatrix<AReal> &A, AReal beta)
 
  298    auto f = [beta](AReal x) { 
return x * beta; };
 
  303 template <
typename AReal>
 
  304 void TCpu<AReal>::ReciprocalElementWise(TCpuMatrix<AReal> &A)
 
  306    auto f = [](AReal x) { 
return 1.0 / x; };
 
  311 template <
typename AReal>
 
  312 void TCpu<AReal>::SquareElementWise(TCpuMatrix<AReal> &A)
 
  314    auto f = [](AReal x) { 
return x * x; };
 
  319 template <
typename AReal>
 
  320 void TCpu<AReal>::SqrtElementWise(TCpuMatrix<AReal> &A)
 
  322    auto f = [](AReal x) { 
return sqrt(x); };
 
  328 template<
typename AReal>
 
  329 void TCpu<AReal>::AdamUpdate(TCpuMatrix<AReal> &A, 
const TCpuMatrix<AReal> & M, 
const TCpuMatrix<AReal> & V, AReal alpha, AReal eps)
 
  333    AReal * a = A.GetRawDataPointer();
 
  334    const AReal * m = M.GetRawDataPointer();
 
  335    const AReal * v = V.GetRawDataPointer();
 
  336    for (
size_t index = 0; index < A.GetNoElements() ; ++index) {
 
  337       a[index] = a[index] - alpha * m[index]/( sqrt(v[index]) + eps);
 
  342 template<
typename AReal>
 
  343 void TCpu<AReal>::AdamUpdateFirstMom(TCpuMatrix<AReal> &A, 
const TCpuMatrix<AReal> & B, AReal beta)
 
  347    AReal * a = A.GetRawDataPointer();
 
  348    const AReal * b = B.GetRawDataPointer();
 
  349    for (
size_t index = 0; index < A.GetNoElements() ; ++index) {
 
  350       a[index] = beta * a[index] + (1.-beta) * b[index];
 
  354 template<
typename AReal>
 
  355 void TCpu<AReal>::AdamUpdateSecondMom(TCpuMatrix<AReal> &A, 
const TCpuMatrix<AReal> & B, AReal beta)
 
  359    AReal * a = A.GetRawDataPointer();
 
  360    const AReal * b = B.GetRawDataPointer();
 
  361    for (
size_t index = 0; index < A.GetNoElements() ; ++index) {
 
  362       a[index] = beta * a[index] + (1.-beta) * b[index] * b[index];