27 #ifndef TMVA_DNN_DLMINIMIZERS
28 #define TMVA_DNN_DLMINIMIZERS
64 template <
typename Architecture_t>
65 class TDLGradientDescent {
67 using DeepNet_t = TDeepNet<Architecture_t>;
68 using Scalar_t =
typename Architecture_t::Scalar_t;
69 using Matrix_t =
typename Architecture_t::Matrix_t;
74 size_t fConvergenceSteps;
76 size_t fConvergenceCount;
79 Scalar_t fTrainingError;
81 Scalar_t fLearningRate;
82 Scalar_t fMinimumError;
87 TDLGradientDescent(Scalar_t learningRate,
size_t convergenceSteps,
size_t testInterval);
92 fMinimumError = std::numeric_limits<Scalar_t>::infinity();
93 fConvergenceCount = 0;
102 void Step(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
const Matrix_t &output,
const Matrix_t &weights);
107 void StepReducedWeights(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
const Matrix_t &output,
108 const Matrix_t &weights);
112 Scalar_t StepLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
const Matrix_t &output,
const Matrix_t &weights);
116 Scalar_t StepReducedWeightsLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
const Matrix_t &output,
117 const Matrix_t &weights);
125 void Step(DeepNet_t &master, std::vector<DeepNet_t> &nets, std::vector<TTensorBatch<Architecture_t>> &batches);
128 void StepMomentum(DeepNet_t &master, std::vector<DeepNet_t> &nets,
129 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t momentum);
133 void StepNesterov(DeepNet_t &master, std::vector<DeepNet_t> &nets,
134 std::vector<TTensorBatch<Architecture_t>> &batches, Scalar_t momentum);
144 bool HasConverged(Scalar_t testError);
147 size_t GetConvergenceCount()
const {
return fConvergenceCount; }
148 size_t GetConvergenceSteps()
const {
return fConvergenceSteps; }
149 Scalar_t GetTrainingError()
const {
return fTrainingError; }
150 Scalar_t GetTestError()
const {
return fTestError; }
151 size_t GetTestInterval()
const {
return fTestInterval; }
154 void SetConvergenceSteps(
size_t steps) { fConvergenceSteps = steps; }
155 void SetTestInterval(
size_t interval) { fTestInterval = interval; }
156 void SetLearningRate(Scalar_t rate) { fLearningRate = rate; }
157 void SetBatchSize(Scalar_t rate) { fBatchSize = rate; }
163 template <
typename Architecture_t>
164 TDLGradientDescent<Architecture_t>::TDLGradientDescent()
165 : fBatchSize(0), fStepCount(0), fConvergenceSteps(0), fConvergenceCount(0), fTestInterval(0), fLearningRate(0),
166 fMinimumError(std::numeric_limits<Scalar_t>::infinity())
172 template <
typename Architecture_t>
173 TDLGradientDescent<Architecture_t>::TDLGradientDescent(Scalar_t learningRate,
size_t convergenceSteps,
175 : fBatchSize(0), fStepCount(0), fConvergenceSteps(convergenceSteps), fConvergenceCount(0),
176 fTestInterval(testInterval), fLearningRate(learningRate), fMinimumError(std::numeric_limits<Scalar_t>::infinity())
182 template <
typename Architecture_t>
183 void TDLGradientDescent<Architecture_t>::Step(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
const Matrix_t &output,
184 const Matrix_t &weights)
187 deepNet.Forward(input,
true);
188 deepNet.Backward(input, output, weights);
189 deepNet.Update(fLearningRate);
193 template <
typename Architecture_t>
194 void TDLGradientDescent<Architecture_t>::StepReducedWeights(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
195 const Matrix_t &output,
const Matrix_t &weights)
198 deepNet.Forward(input,
true);
199 deepNet.Backward(input, output, weights);
201 for (
size_t i = 0; i < deepNet.GetDepth(); i++) {
202 auto *layer = deepNet.GetLayerAt(i);
204 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
206 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
212 template <
typename Architecture_t>
213 auto TDLGradientDescent<Architecture_t>::StepLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
214 const Matrix_t &output,
const Matrix_t &weights) -> Scalar_t
216 Scalar_t loss = deepNet.Loss(input, output);
217 deepNet.Backward(input, output, weights);
218 deepNet.Update(fLearningRate);
224 template <
typename Architecture_t>
225 auto TDLGradientDescent<Architecture_t>::StepReducedWeightsLoss(DeepNet_t &deepNet, std::vector<Matrix_t> &input,
226 const Matrix_t &output,
const Matrix_t &weights)
229 Scalar_t loss = deepNet.Loss(input, output);
230 fTrainingError = loss;
231 deepNet.Backward(input, output, weights);
233 for (
size_t i = 0; i < deepNet.GetDepth(); i++) {
234 auto *layer = deepNet.GetLayerAt(i);
236 layer->UpdateWeights(layer->GetWeightGradients(), fLearningRate);
238 layer->UpdateBiases(layer->GetBiasGradients(), fLearningRate);
246 template <
typename Architecture_t>
247 void TDLGradientDescent<Architecture_t>::Step(DeepNet_t &master, std::vector<DeepNet_t> &nets,
248 std::vector<TTensorBatch<Architecture_t>> &batches)
251 master.ParallelForward(nets, batches);
252 master.ParallelBackward(nets, batches, fLearningRate);
256 template <
typename Architecture_t>
257 void TDLGradientDescent<Architecture_t>::StepMomentum(DeepNet_t &master, std::vector<DeepNet_t> &nets,
258 std::vector<TTensorBatch<Architecture_t>> &batches,
261 master.ParallelForward(nets, batches);
262 master.ParallelBackwardMomentum(nets, batches, fLearningRate, momentum);
266 template <
typename Architecture_t>
267 void TDLGradientDescent<Architecture_t>::StepNesterov(DeepNet_t &master, std::vector<DeepNet_t> &nets,
268 std::vector<TTensorBatch<Architecture_t>> &batches,
271 master.ParallelForward(nets, batches);
272 master.ParallelBackwardNestorov(nets, batches, fLearningRate, momentum);
276 template <
typename Architecture_t>
277 bool TDLGradientDescent<Architecture_t>::HasConverged()
279 if (fTestError < fMinimumError * 0.999) {
280 fConvergenceCount = 0;
281 fMinimumError = fTestError;
286 return (fConvergenceCount >= fConvergenceSteps);
290 template <
typename Architecture_t>
291 bool TDLGradientDescent<Architecture_t>::HasConverged(Scalar_t testError)
293 fTestError = testError;
294 if (fTestError < fMinimumError * 0.999) {
295 fConvergenceCount = 0;
296 fMinimumError = fTestError;
298 fConvergenceCount += fTestInterval;
300 return (fConvergenceCount >= fConvergenceSteps);