43 template <
typename Architecture_t,
typename Layer_t = VGeneralLayer<Architecture_t>,
44 typename DeepNet_t = TDeepNet<Architecture_t, Layer_t>>
45 class TSGD :
public VOptimizer<Architecture_t, Layer_t, DeepNet_t> {
47 using Matrix_t =
typename Architecture_t::Matrix_t;
48 using Scalar_t =
typename Architecture_t::Scalar_t;
52 std::vector<std::vector<Matrix_t>>
54 std::vector<std::vector<Matrix_t>>
58 void UpdateWeights(
size_t layerIndex, std::vector<Matrix_t> &weights,
const std::vector<Matrix_t> &weightGradients);
61 void UpdateBiases(
size_t layerIndex, std::vector<Matrix_t> &biases,
const std::vector<Matrix_t> &biasGradients);
65 TSGD(Scalar_t learningRate, DeepNet_t &deepNet, Scalar_t momentum);
71 Scalar_t GetMomentum()
const {
return fMomentum; }
73 std::vector<std::vector<Matrix_t>> &GetPastWeightGradients() {
return fPastWeightGradients; }
74 std::vector<Matrix_t> &GetPastWeightGradientsAt(
size_t i) {
return fPastWeightGradients[i]; }
76 std::vector<std::vector<Matrix_t>> &GetPastBiasGradients() {
return fPastBiasGradients; }
77 std::vector<Matrix_t> &GetPastBiasGradientsAt(
size_t i) {
return fPastBiasGradients[i]; }
84 template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
85 TSGD<Architecture_t, Layer_t, DeepNet_t>::TSGD(Scalar_t learningRate, DeepNet_t &deepNet, Scalar_t momentum)
86 : VOptimizer<Architecture_t, Layer_t, DeepNet_t>(learningRate, deepNet), fMomentum(momentum)
88 std::vector<Layer_t *> &layers = deepNet.GetLayers();
89 size_t layersNSlices = layers.size();
90 fPastWeightGradients.resize(layersNSlices);
91 fPastBiasGradients.resize(layersNSlices);
93 for (
size_t i = 0; i < layersNSlices; i++) {
95 Architecture_t::CreateWeightTensors( fPastWeightGradients[i], layers[i]->GetWeights());
96 size_t weightsNSlices = fPastWeightGradients[i].size();
97 for (
size_t j = 0; j < weightsNSlices; j++) {
98 initialize<Architecture_t>(fPastWeightGradients[i][j], EInitialization::kZero);
101 Architecture_t::CreateWeightTensors( fPastBiasGradients[i], layers[i]->GetBiases());
102 size_t biasesNSlices = fPastBiasGradients[i].size();
103 for (
size_t j = 0; j < biasesNSlices; j++) {
104 initialize<Architecture_t>(fPastBiasGradients[i][j], EInitialization::kZero);
112 template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
113 auto TSGD<Architecture_t, Layer_t, DeepNet_t>::UpdateWeights(
size_t layerIndex, std::vector<Matrix_t> &weights,
114 const std::vector<Matrix_t> &weightGradients) ->
void
119 std::vector<Matrix_t> ¤tLayerPastWeightGradients = this->GetPastWeightGradientsAt(layerIndex);
121 for (
size_t k = 0; k < currentLayerPastWeightGradients.size(); k++) {
122 Architecture_t::ConstMult(currentLayerPastWeightGradients[k], this->GetMomentum());
123 Architecture_t::ScaleAdd(currentLayerPastWeightGradients[k], weightGradients[k], 1.0);
128 for (
size_t i = 0; i < weights.size(); i++) {
129 Architecture_t::ScaleAdd(weights[i], currentLayerPastWeightGradients[i], -this->GetLearningRate());
134 template <
typename Architecture_t,
typename Layer_t,
typename DeepNet_t>
135 auto TSGD<Architecture_t, Layer_t, DeepNet_t>::UpdateBiases(
size_t layerIndex, std::vector<Matrix_t> &biases,
136 const std::vector<Matrix_t> &biasGradients) ->
void
141 std::vector<Matrix_t> ¤tLayerPastBiasGradients = this->GetPastBiasGradientsAt(layerIndex);
143 for (
size_t k = 0; k < currentLayerPastBiasGradients.size(); k++) {
144 Architecture_t::ConstMult(currentLayerPastBiasGradients[k], this->GetMomentum());
145 Architecture_t::ScaleAdd(currentLayerPastBiasGradients[k], biasGradients[k], 1.0);
150 for (
size_t i = 0; i < biases.size(); i++) {
151 Architecture_t::ScaleAdd(biases[i], currentLayerPastBiasGradients[i], -this->GetLearningRate());