48 template<
typename Architecture_t, 
typename Layer_t = TLayer<Architecture_t>>
 
   52    using Matrix_t         = 
typename Architecture_t::Matrix_t;
 
   53    using Scalar_t         = 
typename Architecture_t::Scalar_t;
 
   54    using LayerIterator_t  = 
typename std::vector<Layer_t>::iterator;
 
   60    std::vector<Layer_t> fLayers; 
 
   65    Scalar_t fWeightDecay; 
 
   69    TNet(
const TNet & other);
 
   70    template<
typename OtherArchitecture_t>
 
   71    TNet(
size_t batchSize, 
const TNet<OtherArchitecture_t> &);
 
   74    TNet(
size_t batchSize,
 
   77         ERegularization fR = ERegularization::kNone,
 
   78         Scalar_t fWeightDecay = 0.0);
 
   81    TNet<Architecture_t, TSharedLayer<Architecture_t>> CreateClone(
size_t batchSize);
 
   84    void AddLayer(
size_t width, EActivationFunction f,
 
   85                  Scalar_t dropoutProbability = 1.0);
 
   91    template <
typename SharedLayer>
 
   92    void AddLayer(SharedLayer & layer);
 
   95    LayerIterator_t LayersBegin() {
return fLayers;}
 
   98    LayerIterator_t LayersEnd() {
return fLayers;}
 
  102    inline void Initialize(EInitialization m);
 
  106    inline void InitializeGradients();
 
  110    inline void Forward(Matrix_t& X, 
bool applyDropout = 
false);
 
  114    inline void Backward(
const Matrix_t &X, 
const Matrix_t &Y, 
const Matrix_t &weights);
 
  118    inline Scalar_t Loss(
const Matrix_t &Y, 
const Matrix_t &weights, 
bool includeRegularization = 
true) 
const;
 
  123    inline Scalar_t Loss(Matrix_t &X, 
const Matrix_t &Y, 
const Matrix_t &weights, 
bool applyDropout = 
false,
 
  124                         bool includeRegularization = 
true);
 
  129    inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);
 
  133    inline void Prediction(Matrix_t &Y_hat, EOutputFunction f) 
const;
 
  135    Scalar_t            GetNFlops();
 
  137    size_t              GetDepth()
 const          {
return fLayers.size();}
 
  138    size_t              GetBatchSize()
 const      {
return fBatchSize;}
 
  139    Layer_t &           GetLayer(
size_t i)        {
return fLayers[i];}
 
  140    const Layer_t &     GetLayer(
size_t i)
 const  {
return fLayers[i];}
 
  141    ELossFunction       GetLossFunction()
 const   {
return fJ;}
 
  142    Matrix_t &          GetOutput()               {
return fLayers.back().GetOutput();}
 
  143    size_t              GetInputWidth()
 const     {
return fInputWidth;}
 
  144    size_t              GetOutputWidth()
 const    {
return fLayers.back().GetWidth();}
 
  145    ERegularization     GetRegularization()
 const {
return fR;}
 
  146    Scalar_t            GetWeightDecay()
 const    {
return fWeightDecay;}
 
  148    void SetBatchSize(
size_t batchSize)       {fBatchSize = batchSize;}
 
  149    void SetInputWidth(
size_t inputWidth)     {fInputWidth = inputWidth;}
 
  150    void SetRegularization(ERegularization R) {fR = R;}
 
  151    void SetLossFunction(ELossFunction J)     {fJ = J;}
 
  152    void SetWeightDecay(Scalar_t weightDecay) {fWeightDecay = weightDecay;}
 
  153    void SetDropoutProbabilities(
const std::vector<Double_t> & probabilities);
 
  159 template<
typename Architecture_t, 
typename Layer_t>
 
  160    TNet<Architecture_t, Layer_t>::TNet()
 
  161     : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
 
  162     fJ(ELossFunction::kMeanSquaredError), fR(ERegularization::kNone),
 
  169 template<
typename Architecture_t, 
typename Layer_t>
 
  170    TNet<Architecture_t, Layer_t>::TNet(
const TNet & other)
 
  171    : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
 
  172     fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
 
  173     fWeightDecay(other.fWeightDecay)
 
  179 template<
typename Architecture_t, 
typename Layer_t>
 
  180 template<
typename OtherArchitecture_t>
 
  181 TNet<Architecture_t, Layer_t>::TNet(
size_t batchSize,
 
  182                                     const TNet<OtherArchitecture_t> & other)
 
  183     : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
 
  184     fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
 
  185     fWeightDecay(other.GetWeightDecay())
 
  187    fLayers.reserve(other.GetDepth());
 
  188    for (
size_t i = 0; i < other.GetDepth(); i++) {
 
  189       AddLayer(other.GetLayer(i).GetWidth(),
 
  190                other.GetLayer(i).GetActivationFunction(),
 
  191                other.GetLayer(i).GetDropoutProbability());
 
  192       fLayers[i].GetWeights() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetWeights();
 
  193       fLayers[i].GetBiases()  = (TMatrixT<Scalar_t>) other.GetLayer(i).GetBiases();
 
  198 template<
typename Architecture_t, 
typename Layer_t>
 
  199    TNet<Architecture_t, Layer_t>::TNet(
size_t        batchSize,
 
  203                                        Scalar_t weightDecay)
 
  204     : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
 
  205     fJ(J), fR(R), fWeightDecay(weightDecay)
 
  211 template<
typename Architecture_t, 
typename Layer_t>
 
  212    auto TNet<Architecture_t, Layer_t>::CreateClone(
size_t BatchSize)
 
  213    -> TNet<Architecture_t, TSharedLayer<Architecture_t>>
 
  215    TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,
 
  217    for (
auto &l : fLayers) {
 
  224 template<
typename Architecture_t, 
typename Layer_t>
 
  225    void TNet<Architecture_t, Layer_t>::AddLayer(
size_t width,
 
  226                                                 EActivationFunction f,
 
  227                                                 Scalar_t dropoutProbability)
 
  229    if (fLayers.size() == 0) {
 
  230       fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
 
  232       size_t prevWidth = fLayers.back().GetWidth();
 
  233       fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
 
  238 template<
typename Architecture_t, 
typename Layer_t>
 
  239    void TNet<Architecture_t, Layer_t>::Clear()
 
  245 template<
typename Architecture_t, 
typename Layer_t>
 
  246    template<
typename SharedLayer_t>
 
  247    inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)
 
  249    fLayers.emplace_back(fBatchSize, layer);
 
  253 template<
typename Architecture_t, 
typename Layer_t>
 
  254    inline void TNet<Architecture_t, Layer_t>::Initialize(EInitialization m)
 
  256    for (
auto &l : fLayers) {
 
  262 template<
typename Architecture_t, 
typename Layer_t>
 
  263    inline void TNet<Architecture_t, Layer_t>::InitializeGradients()
 
  265    for (
auto &l : fLayers) {
 
  266       initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);
 
  267       initialize<Architecture_t>(l.GetBiasGradients(),   EInitialization::kZero);
 
  272 template<
typename Architecture_t, 
typename Layer_t>
 
  273 inline void TNet<Architecture_t, Layer_t>::Forward(Matrix_t &input,
 
  276    fLayers.front().Forward(input, applyDropout);
 
  278    for (
size_t i = 1; i < fLayers.size(); i++) {
 
  279       fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
 
  284 template <
typename Architecture_t, 
typename Layer_t>
 
  285 inline void TNet<Architecture_t, Layer_t>::Backward(
const Matrix_t &X, 
const Matrix_t &Y, 
const Matrix_t &weights)
 
  288    evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(), fJ, Y, fLayers.back().GetOutput(),
 
  291    for (
size_t i = fLayers.size()-1; i > 0; i--) {
 
  292       auto & activation_gradient_backward
 
  293          = fLayers[i-1].GetActivationGradients();
 
  294       auto & activations_backward
 
  295          = fLayers[i-1].GetOutput();
 
  296       fLayers[i].Backward(activation_gradient_backward,
 
  297                           activations_backward, fR, fWeightDecay);
 
  299    fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
 
  304 template <
typename Architecture_t, 
typename Layer_t>
 
  305 inline auto TNet<Architecture_t, Layer_t>::Loss(
const Matrix_t &Y, 
const Matrix_t &weights,
 
  306                                                 bool includeRegularization) 
const -> Scalar_t
 
  308    auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput(), weights);
 
  309    includeRegularization &= (fR != ERegularization::kNone);
 
  310    if (includeRegularization) {
 
  311       for (
auto &l : fLayers) {
 
  312          loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);
 
  319 template <
typename Architecture_t, 
typename Layer_t>
 
  320 inline auto TNet<Architecture_t, Layer_t>::Loss(Matrix_t &X, 
const Matrix_t &Y, 
const Matrix_t &weights,
 
  321                                                 bool applyDropout, 
bool includeRegularization) -> Scalar_t
 
  323    Forward(X, applyDropout);
 
  324    return Loss(Y, weights, includeRegularization);
 
  328 template<
typename Architecture_t, 
typename Layer_t>
 
  329    inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Yhat,
 
  334    evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());
 
  338 template<
typename Architecture_t, 
typename Layer_t>
 
  339    inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Y_hat,
 
  340                                                          EOutputFunction f)
 const 
  342    evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());
 
  346 template<
typename Architecture_t, 
typename Layer_t>
 
  347 auto TNet<Architecture_t, Layer_t>::GetNFlops()
 
  352    Scalar_t nb  = (Scalar_t) fBatchSize;
 
  353    Scalar_t nlp = (Scalar_t) fInputWidth;
 
  355    for(
size_t i = 0; i < fLayers.size(); i++) {
 
  356       Layer_t & layer = fLayers[i];
 
  357       Scalar_t nl = (Scalar_t) layer.GetWidth();
 
  360       flops += nb * nl * (2.0 * nlp - 1); 
 
  362       flops += 2 * nb * nl;               
 
  366       flops += nlp * nl * (2.0 * nb - 1.0);  
 
  367       flops += nl * (nb - 1);                
 
  369          flops += nlp * nb * (2.0 * nl  - 1.0); 
 
  377 template<
typename Architecture_t, 
typename Layer_t>
 
  378 void TNet<Architecture_t, Layer_t>::SetDropoutProbabilities(
 
  379     const std::vector<Double_t> & probabilities)
 
  381    for (
size_t i = 0; i < fLayers.size(); i++) {
 
  382       if (i < probabilities.size()) {
 
  383          fLayers[i].SetDropoutProbability(probabilities[i]);
 
  385          fLayers[i].SetDropoutProbability(1.0);
 
  391 template<
typename Architecture_t, 
typename Layer_t>
 
  392    void TNet<Architecture_t, Layer_t>::Print()
 
  394    std::cout << 
"DEEP NEURAL NETWORK:";
 
  395    std::cout << 
" Loss function = " << 
static_cast<char>(fJ);
 
  396    std::cout << 
", Depth = " << fLayers.size() << std::endl;
 
  399    for (
auto & l : fLayers) {
 
  400       std::cout << 
"DNN Layer " << i << 
":" << std::endl;