27 #ifndef TMVA_DNN_DENSELAYER
28 #define TMVA_DNN_DENSELAYER
56 template <
typename Architecture_t>
57 class TDenseLayer :
public VGeneralLayer<Architecture_t> {
60 using Scalar_t =
typename Architecture_t::Scalar_t;
61 using Matrix_t =
typename Architecture_t::Matrix_t;
62 using Tensor_t =
typename Architecture_t::Tensor_t;
66 Tensor_t fInputActivation;
67 Tensor_t fDerivatives;
69 Scalar_t fDropoutProbability;
71 EActivationFunction fF;
73 Scalar_t fWeightDecay;
75 typename Architecture_t::ActivationDescriptor_t fActivationDesc;
79 TDenseLayer(
size_t BatchSize,
size_t InputWidth,
size_t Width, EInitialization init, Scalar_t DropoutProbability,
80 EActivationFunction f, ERegularization reg, Scalar_t weightDecay);
83 TDenseLayer(TDenseLayer<Architecture_t> *layer);
86 TDenseLayer(
const TDenseLayer &);
96 void Forward(Tensor_t &input,
bool applyDropout =
false);
102 void Backward(Tensor_t &gradients_backward,
const Tensor_t &activations_backward );
109 virtual void AddWeightsXMLTo(
void *parent);
112 virtual void ReadWeightsFromXML(
void *parent);
115 virtual void SetDropoutProbability(Scalar_t dropoutProbability) { fDropoutProbability = dropoutProbability; }
118 Scalar_t GetDropoutProbability()
const {
return fDropoutProbability; }
121 const Tensor_t &GetInputActivation()
const {
return fInputActivation; }
122 Tensor_t &GetInputActivation() {
return fInputActivation; }
124 EActivationFunction GetActivationFunction()
const {
return fF; }
125 ERegularization GetRegularization()
const {
return fReg; }
126 Scalar_t GetWeightDecay()
const {
return fWeightDecay; }
133 template <
typename Architecture_t>
134 TDenseLayer<Architecture_t>::TDenseLayer(
size_t batchSize,
size_t inputWidth,
size_t width, EInitialization init,
135 Scalar_t dropoutProbability, EActivationFunction f, ERegularization reg,
136 Scalar_t weightDecay)
137 : VGeneralLayer<Architecture_t>(batchSize, 1, 1, inputWidth, 1, 1, width, 1, width, inputWidth, 1, width, 1, 1,
138 batchSize, width, init),
139 fInputActivation(), fDropoutProbability(dropoutProbability), fF(f), fReg(reg), fWeightDecay(weightDecay)
142 fInputActivation = Tensor_t ( this->GetOutput().GetShape() );
143 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
145 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
149 template <
typename Architecture_t>
150 TDenseLayer<Architecture_t>::TDenseLayer(TDenseLayer<Architecture_t> *layer) :
151 VGeneralLayer<Architecture_t>(layer),
152 fInputActivation( layer->GetInputActivation().GetShape() ),
153 fDropoutProbability(layer->GetDropoutProbability()),
154 fF(layer->GetActivationFunction()), fReg(layer->GetRegularization()), fWeightDecay(layer->GetWeightDecay())
156 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
157 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
161 template <
typename Architecture_t>
162 TDenseLayer<Architecture_t>::TDenseLayer(
const TDenseLayer &layer) :
163 VGeneralLayer<Architecture_t>(layer),
164 fInputActivation( layer->GetInputActivation()),
165 fDropoutProbability(layer.fDropoutProbability),
166 fF(layer.fF), fReg(layer.fReg), fWeightDecay(layer.fWeightDecay)
168 fDerivatives = Tensor_t ( this->GetOutput().GetShape() );
169 Architecture_t::InitializeActivationDescriptor(fActivationDesc,fF);
173 template <
typename Architecture_t>
174 TDenseLayer<Architecture_t>::~TDenseLayer()
177 Architecture_t::ReleaseDescriptor(fActivationDesc);
184 template <
typename Architecture_t>
185 auto TDenseLayer<Architecture_t>::Forward( Tensor_t &input,
bool applyDropout) ->
void
187 if (applyDropout && (this->GetDropoutProbability() != 1.0)) {
189 Architecture_t::DropoutForward(input, static_cast<TDescriptors *> (
nullptr),
190 static_cast<TWorkspace *> (
nullptr),
191 this->GetDropoutProbability());
193 Architecture_t::MultiplyTranspose(this->GetOutput() , input, this->GetWeightsAt(0));
194 Architecture_t::AddRowWise(this->GetOutput(), this->GetBiasesAt(0));
197 Architecture_t::Copy(this->GetInputActivation(),this->GetOutput());
199 Architecture_t::ActivationFunctionForward(this->GetOutput(), this->GetActivationFunction(), fActivationDesc);
203 template <
typename Architecture_t>
204 auto TDenseLayer<Architecture_t>::Backward(Tensor_t &gradients_backward,
const Tensor_t &activations_backward) ->
void
209 if (this->GetDropoutProbability() != 1.0) {
210 Architecture_t::DropoutBackward(this->GetActivationGradients(),
211 static_cast<TDescriptors *> (
nullptr),
212 static_cast<TWorkspace *> (
nullptr));
215 Architecture_t::ActivationFunctionBackward(fDerivatives, this->GetOutput(),
216 this->GetActivationGradients(), this->GetInputActivation(),
217 this->GetActivationFunction(), fActivationDesc);
219 Architecture_t::Backward(gradients_backward, this->GetWeightGradientsAt(0), this->GetBiasGradientsAt(0),
220 fDerivatives, this->GetActivationGradients(), this->GetWeightsAt(0),
221 activations_backward);
223 addRegularizationGradients<Architecture_t>(this->GetWeightGradientsAt(0), this->GetWeightsAt(0),
224 this->GetWeightDecay(), this->GetRegularization());
228 template <
typename Architecture_t>
229 void TDenseLayer<Architecture_t>::Print()
const
231 std::cout <<
" DENSE Layer: \t";
232 std::cout <<
" ( Input =" << std::setw(6) << this->GetWeightsAt(0).GetNcols();
233 std::cout <<
" , Width =" << std::setw(6) << this->GetWeightsAt(0).GetNrows() <<
" ) ";
235 std::cout <<
"\tOutput = ( " << std::setw(2) << this->GetOutput().GetFirstSize() <<
" ," << std::setw(6) << this->GetOutput().GetShape()[0] <<
" ," << std::setw(6) << this->GetOutput().GetShape()[1] <<
" ) ";
237 std::vector<std::string> activationNames = {
"Identity",
"Relu",
"Sigmoid",
"Tanh",
"SymmRelu",
"SoftSign",
"Gauss" };
238 std::cout <<
"\t Activation Function = ";
239 std::cout << activationNames[ static_cast<int>(fF) ];
240 if (fDropoutProbability != 1.) std::cout <<
"\t Dropout prob. = " << fDropoutProbability;
241 std::cout << std::endl;
246 template <
typename Architecture_t>
247 void TDenseLayer<Architecture_t>::AddWeightsXMLTo(
void *parent)
251 auto layerxml = gTools().xmlengine().NewChild(parent, 0,
"DenseLayer");
253 gTools().xmlengine().NewAttr(layerxml, 0,
"Width", gTools().StringFromInt(this->GetWidth()));
255 int activationFunction =
static_cast<int>(
this -> GetActivationFunction());
256 gTools().xmlengine().NewAttr(layerxml, 0,
"ActivationFunction",
257 TString::Itoa(activationFunction, 10));
259 this->WriteMatrixToXML(layerxml,
"Weights",
this -> GetWeightsAt(0));
260 this->WriteMatrixToXML(layerxml,
"Biases",
this -> GetBiasesAt(0));
264 template <
typename Architecture_t>
265 void TDenseLayer<Architecture_t>::ReadWeightsFromXML(
void *parent)
268 this->ReadMatrixXML(parent,
"Weights",
this -> GetWeightsAt(0));
269 this->ReadMatrixXML(parent,
"Biases",
this -> GetBiasesAt(0));