18 #ifndef TMVA_DNN_ARCHITECTURES_CUDNN 
   19 #define TMVA_DNN_ARCHITECTURES_CUDNN 
   21 #include "RConfigure.h"    
   24 #error This file can be compiled only when cudnn is available in ROOT 
   50 struct TCudnnEmptyDescriptor {};
 
   60 template<
typename AFloat = Float_t>
 
   64    static TRandom * fgRandomGen;
 
   67    using Scalar_t       = AFloat;
 
   68    using Matrix_t       = TCudaTensor<AFloat>;
 
   69    using Tensor_t       = TCudaTensor<AFloat>;
 
   70    using DeviceBuffer_t = TCudaDeviceBuffer<AFloat>;
 
   71    using HostBuffer_t   = TCudaHostBuffer<AFloat>;
 
   74    using ActivationDescriptor_t  = cudnnActivationDescriptor_t;
 
   75    using ConvolutionDescriptor_t = cudnnConvolutionDescriptor_t;
 
   76    using DropoutDescriptor_t     = cudnnDropoutDescriptor_t;
 
   77    using FilterDescriptor_t      = cudnnFilterDescriptor_t;
 
   79    using PoolingDescriptor_t     = cudnnPoolingDescriptor_t;
 
   81    using AlgorithmForward_t      = cudnnConvolutionFwdAlgo_t;
 
   82    using AlgorithmBackward_t     = cudnnConvolutionBwdDataAlgo_t;
 
   83    using AlgorithmHelper_t       = cudnnConvolutionBwdFilterAlgo_t;
 
   84    using AlgorithmDataType_t     = cudnnDataType_t;
 
   85    using ReduceTensorDescriptor_t = cudnnReduceTensorDescriptor_t;
 
   86    using TensorDescriptor_t       = cudnnTensorDescriptor_t;
 
   88    using EmptyDescriptor_t       = TCudnnEmptyDescriptor;        
 
   90    using BNormLayer_t            = TBatchNormLayer<TCudnn<AFloat>>;
 
   91    using BNormDescriptors_t      = TDNNGenDescriptors<BNormLayer_t>;
 
   93    using ConvLayer_t             = CNN::TConvLayer<TCudnn<AFloat>>;
 
   94    using ConvDescriptors_t       = CNN::TCNNDescriptors<ConvLayer_t>;
 
   95    using ConvWorkspace_t         = CNN::TCNNWorkspace<ConvLayer_t>;
 
   96    using PoolingLayer_t          = CNN::TMaxPoolLayer<TCudnn<AFloat>>;
 
   97    using PoolingDescriptors_t    = CNN::TCNNDescriptors<PoolingLayer_t>;
 
   98    using PoolingWorkspace_t      = CNN::TCNNWorkspace<PoolingLayer_t>;
 
  107       static int ConvFwdAlgorithm;
 
  108       static int ConvBwdDataAlgorithm;
 
  109       static int ConvBwdFilterAlgorithm;
 
  111       static Long_t ConvMaxWorkspaceSize;
 
  114    static TMVA::Experimental::MemoryLayout GetTensorLayout() { 
return TMVA::Experimental::MemoryLayout::RowMajor; }
 
  117    static Tensor_t CreateTensor(
size_t n, 
size_t c, 
size_t h, 
size_t w) {
 
  118       return Tensor_t( {n,c,h,w}, GetTensorLayout(), 0, 0);
 
  121    static Tensor_t CreateTensor(DeviceBuffer_t buffer, 
size_t n, 
size_t c, 
size_t h, 
size_t w) {
 
  122       return Tensor_t( buffer, {n,c,h,w}, GetTensorLayout(), 0, 0);
 
  127    static void  CreateWeightTensors( std::vector<Matrix_t> & newWeights, 
const std::vector<Matrix_t> & weights) {
 
  128       if (!newWeights.empty()) newWeights.clear();
 
  129       size_t n =  weights.size();
 
  130       for (
size_t i = 0; i < n; ++i)
 
  131          newWeights.emplace_back( weights[i].GetShape(), weights[i].GetLayout(), 0, 0);
 
  138    static void InitializeBNormDescriptors(TDescriptors * & descriptors,
 
  139                                           BNormLayer_t *L = 
nullptr);
 
  141    static void InitializeConvDescriptors(TDescriptors * & descriptors,
 
  142                                          ConvLayer_t *L = 
nullptr);
 
  144    static void InitializePoolDescriptors(TDescriptors * & descriptors,
 
  145                                         PoolingLayer_t *L = 
nullptr);
 
  147    static void InitializeActivationDescriptor(ActivationDescriptor_t & descriptors, EActivationFunction activFunc, 
double coef = 0.0);
 
  149    static void ReleaseConvDescriptors(TDescriptors    * descriptors );
 
  150    static void ReleasePoolDescriptors(TDescriptors * descriptors );
 
  151    static void ReleaseBNormDescriptors(TDescriptors * descriptors );
 
  152    static void ReleaseDescriptor(EmptyDescriptor_t       & emptyDescr) {}        
 
  153    static void ReleaseDescriptor(ActivationDescriptor_t  & activationDescr);
 
  154    static void ReleaseDescriptor(ConvolutionDescriptor_t & convolutionDescr);
 
  155    static void ReleaseDescriptor(DropoutDescriptor_t     & dropoutDescr);
 
  156    static void ReleaseDescriptor(FilterDescriptor_t      & filterDescr);
 
  157    static void ReleaseDescriptor(PoolingDescriptor_t     & poolingDescr);
 
  158    static void ReleaseDescriptor(TensorDescriptor_t      & tensorDescr);
 
  161    static void InitializeConvWorkspace(TWorkspace * & workspace,
 
  162                                        TDescriptors * & descriptors,
 
  163                                        const DNN::CNN::TConvParams & params,
 
  164                                        ConvLayer_t *L = 
nullptr);
 
  165    static void InitializePoolDropoutWorkspace(TWorkspace * & workspace,
 
  166                                        TDescriptors * & descriptors,
 
  167                                        const DNN::CNN::TConvParams & params,
 
  168                                        PoolingLayer_t *L = 
nullptr);
 
  170    static void FreeConvWorkspace(TWorkspace * workspace, ConvLayer_t *L = 
nullptr);
 
  171    static void FreePoolDropoutWorkspace(TWorkspace * workspace, PoolingLayer_t *L = 
nullptr);
 
  184    static void MultiplyTranspose(Tensor_t &output, 
const Tensor_t &input, 
const Matrix_t &weights);
 
  187    static void AddRowWise(Tensor_t &output,
const Matrix_t &biases);
 
  202    static void Backward(Tensor_t & activationGradientsBackward,
 
  203                         Matrix_t & weightGradients,
 
  204                         Matrix_t & biasGradients,
 
  206                         const Tensor_t & activationGradients,
 
  207                         const Matrix_t & weights,
 
  208                         const Tensor_t & activationBackward);
 
  211    static void ScaleAdd(Tensor_t & A, 
const Tensor_t & B,
 
  212                         Scalar_t alpha = 1.0,
 
  213                         Scalar_t beta = 1.0);
 
  216    static void Copy(Tensor_t & A, 
const Tensor_t & B);
 
  219    template<
typename ATensor_t>
 
  220    static void CopyDiffArch(Tensor_t & A,
 
  221                             const ATensor_t & B);
 
  223    template <
typename ATensor_t>
 
  224    static void CopyWeightsDiffArch(Tensor_t &A, 
const ATensor_t &B);
 
  227    static void CopyDiffArch(Tensor_t A, 
const Tensor_t & B ) { Copy(A,B); }
 
  230    template<
typename AMatrix_t>
 
  231    static void CopyDiffArch(std::vector<Tensor_t>  & A,
 
  232                             const std::vector<AMatrix_t> & B);
 
  247    static void Identity(Tensor_t & X) {}
 
  248    static void IdentityDerivative(Tensor_t & dX, Tensor_t& X,
 
  249                                   Tensor_t & Y,  Tensor_t & dY,
 
  250                                   ActivationDescriptor_t activationDescr,
 
  251                                   const AFloat alpha = 1,
 
  252                                   const AFloat beta = 1) {}
 
  254    static void ActivationFunctionForward(Tensor_t & X, EActivationFunction activFunct,
 
  255                           const ActivationDescriptor_t activationDescr,
 
  256                           const double coef = 0.0, 
const AFloat alpha = 1,
 
  257                           const AFloat beta = 0);
 
  260    static void ActivationFunctionForward(Tensor_t &Y, 
const Tensor_t & X, EActivationFunction activFunct,
 
  261                                          const ActivationDescriptor_t activationDescr, 
const double coef = 0.0,
 
  262                                          const AFloat alpha = 1, 
const AFloat beta = 0);
 
  265    static void ActivationFunctionBackward(Tensor_t & dX, 
const Tensor_t & Y,
 
  266                                           const Tensor_t & dY,  
const Tensor_t & X,
 
  267                                           EActivationFunction activFunct,
 
  268                                           const ActivationDescriptor_t activationDescr,
 
  269                                           const AFloat alpha = 1,
 
  270                                           const AFloat beta = 0);
 
  276    static void SymmetricReluDerivative(Tensor_t & B,
 
  277                                        const Tensor_t & A) {}
 
  280    static void SoftSignDerivative(Tensor_t & B,
 
  281                                   const Tensor_t & A) {}
 
  284    static void GaussDerivative(Tensor_t & B,
 
  285                                const Tensor_t & A) {}
 
  302    static Scalar_t MeanSquaredError(
const Matrix_t &Y, 
const Matrix_t &output,
 
  303                                     const Matrix_t &weights);
 
  304    static void MeanSquaredErrorGradients(Matrix_t &dY, 
const Matrix_t &Y,
 
  305                                          const Matrix_t &output, 
const Matrix_t &weights);
 
  309    static Scalar_t CrossEntropy(
const Matrix_t &Y, 
const Matrix_t &output,
 
  310                                 const Matrix_t &weights);
 
  312    static void CrossEntropyGradients(Matrix_t &dY, 
const Matrix_t &Y,
 
  313                                      const Matrix_t &output, 
const Matrix_t &weights);
 
  317    static Scalar_t SoftmaxCrossEntropy(
const Matrix_t &Y, 
const Matrix_t &output,
 
  318                                        const Matrix_t &weights);
 
  319    static void SoftmaxCrossEntropyGradients(Matrix_t &dY, 
const Matrix_t &Y,
 
  320                                             const Matrix_t &output, 
const Matrix_t &weights);
 
  336    static void Sigmoid(Matrix_t &YHat,
 
  338    static void Softmax(Matrix_t &YHat,
 
  355    static void DropoutForward(Tensor_t & A,
 
  356                               TDescriptors * descriptors,
 
  357                               TWorkspace         * workspace,
 
  360    static void DropoutBackward(Tensor_t & A,
 
  361                                TDescriptors * descriptors,
 
  362                                TWorkspace   * workspace);
 
  380    static void BatchNormLayerForwardTraining(
int axis, 
const Tensor_t &x, Tensor_t &y, Matrix_t &gamma, Matrix_t &beta,
 
  381                                              Matrix_t &mean, Matrix_t &, Matrix_t &iVariance, Matrix_t &runningMeans,
 
  382                                              Matrix_t &runningVars, Scalar_t nTrainedBatches, Scalar_t momentum,
 
  383                                              Scalar_t epsilon, 
const TensorDescriptor_t &bnParDescriptor);
 
  388    static void BatchNormLayerForwardInference(
int axis, 
const Tensor_t &x, Matrix_t &gamma, Matrix_t &beta,
 
  389                                               Tensor_t &y, 
const Matrix_t &runningMeans,
 
  390                                               const Matrix_t &runningVars, Scalar_t epsilon,
 
  391                                               const TensorDescriptor_t &);
 
  393    static void BatchNormLayerBackward(
int axis, 
const Tensor_t &x, 
const Tensor_t &dy, Tensor_t &dx,
 
  395                                       Matrix_t &dgamma, Matrix_t &dbeta, 
const Matrix_t &mean, 
const Matrix_t &variance,
 
  396                                       const Matrix_t &iVariance, Scalar_t epsilon, 
const TensorDescriptor_t &);
 
  411    static Scalar_t L1Regularization(
const Matrix_t &W)
 
  413       TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
 
  414       return TCuda<AFloat>::L1Regularization(mW);
 
  416    static void AddL1RegularizationGradients(Matrix_t &A, 
const Matrix_t &W, Scalar_t weightDecay)
 
  418       TCudaMatrix<AFloat> mA(A.GetDeviceBuffer(), A.GetSize(), 1);
 
  419       TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
 
  420       return TCuda<AFloat>::AddL1RegularizationGradients(mA, mW, weightDecay);
 
  423    static Scalar_t L2Regularization(
const Matrix_t &W)
 
  425       TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
 
  426       return TCuda<AFloat>::L2Regularization(mW);
 
  428    static void AddL2RegularizationGradients(Matrix_t &A, 
const Matrix_t &W, Scalar_t weightDecay)
 
  430       TCudaMatrix<AFloat> mA(A.GetDeviceBuffer(), A.GetSize(), 1);
 
  431       TCudaMatrix<AFloat> mW(W.GetDeviceBuffer(), W.GetSize(), 1);
 
  432       return TCuda<AFloat>::AddL1RegularizationGradients(mA, mW, weightDecay);
 
  448    static void InitializeGauss(Matrix_t &A);
 
  449    static void InitializeUniform(Matrix_t &A);
 
  450    static void InitializeIdentity(Matrix_t &A);
 
  451    static void InitializeZero(Matrix_t &A);
 
  452    static void InitializeGlorotNormal(Matrix_t &A);
 
  453    static void InitializeGlorotUniform(Matrix_t &A);
 
  457    static TRandom &GetRandomGenerator();
 
  460    static void SetRandomSeed(
size_t seed);
 
  474    static void Dropout(Tensor_t &A, Scalar_t p) {}
 
  488    static void AddConvBiases(Matrix_t &output, 
const Matrix_t &biases);
 
  492    static void PrepareInternals(Tensor_t &) {}
 
  495    static void ConvLayerForward(Tensor_t &output,
 
  496                                 Tensor_t &inputActivationFunc, 
 
  497                                 const Tensor_t &input, 
const Matrix_t &weights, 
const Matrix_t &biases,
 
  498                                 const DNN::CNN::TConvParams ¶ms, EActivationFunction activFunc,
 
  499                                 Tensor_t & , 
const ConvDescriptors_t &descriptors,
 
  500                                 ConvWorkspace_t &workspace);
 
  516    static void ConvLayerBackward(Tensor_t &activationGradientsBackward, Matrix_t &weightGradients,
 
  517                                  Matrix_t &biasGradients, Tensor_t &inputActivation, Tensor_t &activationGradients,
 
  518                                  const Matrix_t &weights, 
const Tensor_t &activationBackward,
 
  519                                  const Tensor_t &outputTensor, EActivationFunction activFunc,
 
  520                                  const ConvDescriptors_t &descriptors, ConvWorkspace_t &workspace, 
size_t ,
 
  521                                  size_t , 
size_t , 
size_t , 
size_t ,
 
  522                                  size_t , 
size_t , 
size_t ,
 
  538    static void Downsample(Tensor_t &A, Tensor_t & , 
const Tensor_t &C, 
const PoolingDescriptors_t &descriptors,
 
  539                           PoolingWorkspace_t &workspace, 
size_t imgHeight, 
size_t imgWidth, 
size_t fltHeight,
 
  540                           size_t fltWidth, 
size_t strideRows, 
size_t strideCols);
 
  550    static void MaxPoolLayerBackward(Tensor_t &activationGradientsBackward, 
const Tensor_t &activationGradients,
 
  551                                     const Tensor_t & , 
const Tensor_t &inputActivation,
 
  552                                     const Tensor_t &outputTensor, 
const PoolingDescriptors_t &descriptors,
 
  553                                     PoolingWorkspace_t &workspace, 
size_t imgHeight, 
size_t imgWidth, 
size_t fltHeight,
 
  554                                     size_t fltWidth, 
size_t strideRows, 
size_t strideCols, 
size_t nLocalViews);
 
  571    static void Flatten(Tensor_t &A, 
const Tensor_t &B);
 
  575    static void Deflatten(Tensor_t &A, 
const Tensor_t &B); 
 
  578    static void Rearrange(Tensor_t &out, 
const Tensor_t &in) { TCuda<AFloat>::Rearrange(out, in); }
 
  581    static Matrix_t &RecurrentLayerBackward(Matrix_t &state_gradients_backward, 
 
  582                                            Matrix_t & , Matrix_t &,
 
  591       return state_gradients_backward;
 
  610    static void Hadamard(Tensor_t &A, 
const Tensor_t &B)
 
  612       TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), 1, A.GetSize());
 
  613       TCudaMatrix<AFloat> tmpB(B.GetDeviceBuffer(), 1, B.GetSize());
 
  614       assert(A.GetSize() == B.GetSize());
 
  615       TCuda<AFloat>::Hadamard(tmpA, tmpB);
 
  626    static Scalar_t Sum(
const Matrix_t &A, Scalar_t alpha = 1.0, Scalar_t beta = 0.0);
 
  634    static void ConstAdd(Matrix_t &A, Scalar_t beta) {
 
  635       TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  636       TCuda<AFloat>::ConstAdd(tmp,beta);
 
  642    static void ConstMult(Matrix_t &A, Scalar_t beta) {
 
  643       TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  644       TCuda<AFloat>::ConstMult(tmp,beta);
 
  650    static void ReciprocalElementWise(Matrix_t &A) {
 
  651       TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  652       TCuda<AFloat>::ReciprocalElementWise(tmp);
 
  658    static void SquareElementWise(Matrix_t &A) {
 
  659       TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  660       TCuda<AFloat>::SquareElementWise(tmp);
 
  667    static void SqrtElementWise(Matrix_t &A) {
 
  668       TCudaMatrix<AFloat> tmp(A.GetDeviceBuffer(), 1, A.GetSize());
 
  669       TCuda<AFloat>::SqrtElementWise(tmp);
 
  673    static void AdamUpdate(Matrix_t & A, 
const Matrix_t & M, 
const Matrix_t & V, Scalar_t alpha, Scalar_t eps) {
 
  674       TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), A.GetSize(),1);
 
  675       TCudaMatrix<AFloat> tmpM(M.GetDeviceBuffer(), M.GetSize(),1);
 
  676       TCudaMatrix<AFloat> tmpV(V.GetDeviceBuffer(), V.GetSize(),1);
 
  677       TCuda<AFloat>::AdamUpdate(tmpA, tmpM, tmpV,alpha, eps);
 
  679    static void AdamUpdateFirstMom(Matrix_t & A, 
const Matrix_t & B, Scalar_t beta) {
 
  680       TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), A.GetSize(),1);
 
  681       TCudaMatrix<AFloat> tmpB(B.GetDeviceBuffer(), B.GetSize(),1);
 
  682       TCuda<AFloat>::AdamUpdateFirstMom(tmpA, tmpB,  beta);
 
  684    static void AdamUpdateSecondMom(Matrix_t & A, 
const Matrix_t & B, Scalar_t beta) {
 
  685       TCudaMatrix<AFloat> tmpA(A.GetDeviceBuffer(), A.GetSize(),1);
 
  686       TCudaMatrix<AFloat> tmpB(B.GetDeviceBuffer(), B.GetSize(),1);
 
  687       TCuda<AFloat>::AdamUpdateSecondMom(tmpA, tmpB,  beta);
 
  691    static void PrintTensor( 
const Tensor_t & A, 
const std::string name = 
"tensor", 
bool = 
false);
 
  702    static void SumRows(Matrix_t & B, 
const Matrix_t & A);
 
  709 template <
typename AFloat>
 
  710 template <
typename ATensor>
 
  711 void TCudnn<AFloat>::CopyDiffArch(TCudaTensor<AFloat> &B,
 
  718    if (B.GetLayout() == GetTensorLayout() ) {
 
  719       assert(B.GetShape().size() == 4);
 
  720       for (
size_t i = 0; i < A.GetFirstSize(); ++i) {
 
  721          TMatrixT<AFloat> matIn = A.At(i).GetMatrix(); 
 
  723          TCudaTensor<AFloat> tmpOut = B.At(i); 
 
  725          TCudaTensor<AFloat> tmpIn(matIn.GetMatrixArray(), tmpOut.GetShape(), tmpOut.GetLayout());
 
  730       TMatrixT<AFloat> tmp = A;
 
  731       TCudaMatrix<AFloat> tmp2(tmp);
 
  732       TCudaTensor<AFloat> tA(tmp2);
 
  738 template <
typename AFloat>
 
  739 template <
typename AMatrix>
 
  740 void TCudnn<AFloat>::CopyWeightsDiffArch(TCudaTensor<AFloat> &B, 
const  AMatrix &A)
 
  744    TMatrixT<AFloat> tmp = A; 
 
  746    if (B.GetLayout() == GetTensorLayout()  ) {
 
  748       assert(B.GetShape().size() == 4);  
 
  751    TCudaMatrix<AFloat> tmp2(tmp);
 
  752    TCudaTensor<AFloat> tA(tmp2);
 
  757 template <
typename AFloat>
 
  758 template <
typename AMatrix_t>
 
  759 void TCudnn<AFloat>::CopyDiffArch(std::vector<Tensor_t> &B,
 
  760                             const std::vector<AMatrix_t> &A)
 
  762    for (
size_t i = 0; i < B.size(); ++i) {
 
  763       CopyWeightsDiffArch(B[i], A[i]);
 
  767 template <
typename AFloat>
 
  768 void TCudnn<AFloat>::PrintTensor(
const typename TCudnn<AFloat>::Tensor_t & A, 
const std::string name, 
bool truncate )
 
  770    std::cout << name << 
"  size = " << A.GetSize() << 
" shape = { ";
 
  771    auto shape = A.GetShape();
 
  772    for (
size_t k = 0; k < shape.size()-1; ++k)
 
  773       std::cout << shape[k] << 
" , ";
 
  774    std::cout << shape.back() << 
" } ";
 
  775    std::cout << 
" strides = { ";
 
  776    auto strides = A.GetStrides();
 
  777    for (
size_t k = 0; k < strides.size()-1; ++k)
 
  778       std::cout << strides[k] << 
" , ";
 
  779    std::cout << strides.back() << 
" }\n ";
 
  781    if (A.GetShape().size() == 2 ) {
 
  782       for (
size_t i = 0; i < A.GetShape()[0]; ++i) {
 
  784          size_t n =  A.GetShape()[1];
 
  785          if (truncate) n = std::min(n,
size_t(10));
 
  786          for (
size_t j = 0; j < n; ++j) {
 
  787             std::cout << A(i,j) << 
" ";
 
  790          if (truncate && n < A.GetShape()[1]) std::cout << 
" ...... ";
 
  791          std::cout << 
" } " << std::endl;
 
  793    } 
else if  (A.GetShape().size() == 3 ) {
 
  794       for (
size_t i = 0; i < A.GetFirstSize(); ++i) {
 
  796          for (
size_t j = 0; j < A.GetHSize(); ++j) {
 
  798             size_t n =  A.GetWSize();
 
  799             if (truncate)  n = std::min(n,
size_t(10));
 
  800             for (
size_t k = 0; k < n; ++k) {
 
  801                std::cout << A(i,j,k) << 
" ";
 
  803             if (truncate && n < A.GetWSize()) std::cout << 
" ...... ";
 
  804             std::cout << 
" } " << std::endl;
 
  806          std::cout << 
" } " << std::endl;
 
  808    } 
else if  (A.GetShape().size() == 4 ) {
 
  809       for (
size_t i = 0; i < A.GetShape()[0]; ++i) {
 
  811          for (
size_t j = 0; j < A.GetShape()[1]; ++j) {
 
  813             for (
size_t k = 0; k < A.GetShape()[2]; ++k) {
 
  814                size_t n =  A.GetShape()[3];
 
  815                if (truncate)  n = std::min(n,
size_t(10));
 
  816                for (
size_t l = 0; l < n; ++l) {
 
  817                   std::cout << A(i,j,k,l) << 
" ";
 
  819                if (truncate && n < A.GetShape()[3]) std::cout << 
" ...... ";
 
  820                std::cout << 
" } " << std::endl;
 
  822             std::cout << 
" } " << std::endl;
 
  824          std::cout << 
" } " << std::endl;
 
  828       for (
size_t l = 0; l < A.GetSize(); ++l) {
 
  829          std::cout << A.GetData()[l] << 
" ";
 
  847 template <
typename AFloat>
 
  848 int TCudnn<AFloat>::CNNOptions::ConvFwdAlgorithm = -1;
 
  849 template <
typename AFloat>
 
  850 int TCudnn<AFloat>::CNNOptions::ConvBwdDataAlgorithm = -1;
 
  851 template <
typename AFloat>
 
  852 int TCudnn<AFloat>::CNNOptions::ConvBwdFilterAlgorithm = -1;
 
  853 template <
typename AFloat>
 
  854 Long_t TCudnn<AFloat>::CNNOptions::ConvMaxWorkspaceSize = -1;