48 template<
typename Architecture_t,
typename Layer_t = TLayer<Architecture_t>>
52 using Matrix_t =
typename Architecture_t::Matrix_t;
53 using Scalar_t =
typename Architecture_t::Scalar_t;
54 using LayerIterator_t =
typename std::vector<Layer_t>::iterator;
60 std::vector<Layer_t> fLayers;
65 Scalar_t fWeightDecay;
69 TNet(
const TNet & other);
70 template<
typename OtherArchitecture_t>
71 TNet(
size_t batchSize,
const TNet<OtherArchitecture_t> &);
74 TNet(
size_t batchSize,
77 ERegularization fR = ERegularization::kNone,
78 Scalar_t fWeightDecay = 0.0);
81 TNet<Architecture_t, TSharedLayer<Architecture_t>> CreateClone(
size_t batchSize);
84 void AddLayer(
size_t width, EActivationFunction f,
85 Scalar_t dropoutProbability = 1.0);
91 template <
typename SharedLayer>
92 void AddLayer(SharedLayer & layer);
95 LayerIterator_t LayersBegin() {
return fLayers;}
98 LayerIterator_t LayersEnd() {
return fLayers;}
102 inline void Initialize(EInitialization m);
106 inline void InitializeGradients();
110 inline void Forward(Matrix_t& X,
bool applyDropout =
false);
114 inline void Backward(
const Matrix_t &X,
const Matrix_t &Y,
const Matrix_t &weights);
118 inline Scalar_t Loss(
const Matrix_t &Y,
const Matrix_t &weights,
bool includeRegularization =
true)
const;
123 inline Scalar_t Loss(Matrix_t &X,
const Matrix_t &Y,
const Matrix_t &weights,
bool applyDropout =
false,
124 bool includeRegularization =
true);
129 inline void Prediction(Matrix_t &Y_hat, Matrix_t &X, EOutputFunction f);
133 inline void Prediction(Matrix_t &Y_hat, EOutputFunction f)
const;
135 Scalar_t GetNFlops();
137 size_t GetDepth()
const {
return fLayers.size();}
138 size_t GetBatchSize()
const {
return fBatchSize;}
139 Layer_t & GetLayer(
size_t i) {
return fLayers[i];}
140 const Layer_t & GetLayer(
size_t i)
const {
return fLayers[i];}
141 ELossFunction GetLossFunction()
const {
return fJ;}
142 Matrix_t & GetOutput() {
return fLayers.back().GetOutput();}
143 size_t GetInputWidth()
const {
return fInputWidth;}
144 size_t GetOutputWidth()
const {
return fLayers.back().GetWidth();}
145 ERegularization GetRegularization()
const {
return fR;}
146 Scalar_t GetWeightDecay()
const {
return fWeightDecay;}
148 void SetBatchSize(
size_t batchSize) {fBatchSize = batchSize;}
149 void SetInputWidth(
size_t inputWidth) {fInputWidth = inputWidth;}
150 void SetRegularization(ERegularization R) {fR = R;}
151 void SetLossFunction(ELossFunction J) {fJ = J;}
152 void SetWeightDecay(Scalar_t weightDecay) {fWeightDecay = weightDecay;}
153 void SetDropoutProbabilities(
const std::vector<Double_t> & probabilities);
159 template<
typename Architecture_t,
typename Layer_t>
160 TNet<Architecture_t, Layer_t>::TNet()
161 : fBatchSize(0), fInputWidth(0), fLayers(), fDummy(0,0),
162 fJ(ELossFunction::kMeanSquaredError), fR(ERegularization::kNone),
169 template<
typename Architecture_t,
typename Layer_t>
170 TNet<Architecture_t, Layer_t>::TNet(
const TNet & other)
171 : fBatchSize(other.fBatchSize), fInputWidth(other.fInputWidth),
172 fLayers(other.fLayers), fDummy(0,0), fJ(other.fJ), fR(other.fR),
173 fWeightDecay(other.fWeightDecay)
179 template<
typename Architecture_t,
typename Layer_t>
180 template<
typename OtherArchitecture_t>
181 TNet<Architecture_t, Layer_t>::TNet(
size_t batchSize,
182 const TNet<OtherArchitecture_t> & other)
183 : fBatchSize(batchSize), fInputWidth(other.GetInputWidth()), fLayers(),
184 fDummy(0,0), fJ(other.GetLossFunction()), fR(other.GetRegularization()),
185 fWeightDecay(other.GetWeightDecay())
187 fLayers.reserve(other.GetDepth());
188 for (
size_t i = 0; i < other.GetDepth(); i++) {
189 AddLayer(other.GetLayer(i).GetWidth(),
190 other.GetLayer(i).GetActivationFunction(),
191 other.GetLayer(i).GetDropoutProbability());
192 fLayers[i].GetWeights() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetWeights();
193 fLayers[i].GetBiases() = (TMatrixT<Scalar_t>) other.GetLayer(i).GetBiases();
198 template<
typename Architecture_t,
typename Layer_t>
199 TNet<Architecture_t, Layer_t>::TNet(
size_t batchSize,
203 Scalar_t weightDecay)
204 : fBatchSize(batchSize), fInputWidth(inputWidth), fLayers(), fDummy(0,0),
205 fJ(J), fR(R), fWeightDecay(weightDecay)
211 template<
typename Architecture_t,
typename Layer_t>
212 auto TNet<Architecture_t, Layer_t>::CreateClone(
size_t BatchSize)
213 -> TNet<Architecture_t, TSharedLayer<Architecture_t>>
215 TNet<Architecture_t, TSharedLayer<Architecture_t>> other(BatchSize, fInputWidth,
217 for (
auto &l : fLayers) {
224 template<
typename Architecture_t,
typename Layer_t>
225 void TNet<Architecture_t, Layer_t>::AddLayer(
size_t width,
226 EActivationFunction f,
227 Scalar_t dropoutProbability)
229 if (fLayers.size() == 0) {
230 fLayers.emplace_back(fBatchSize, fInputWidth, width, f, dropoutProbability);
232 size_t prevWidth = fLayers.back().GetWidth();
233 fLayers.emplace_back(fBatchSize, prevWidth, width, f, dropoutProbability);
238 template<
typename Architecture_t,
typename Layer_t>
239 void TNet<Architecture_t, Layer_t>::Clear()
245 template<
typename Architecture_t,
typename Layer_t>
246 template<
typename SharedLayer_t>
247 inline void TNet<Architecture_t, Layer_t>::AddLayer(SharedLayer_t & layer)
249 fLayers.emplace_back(fBatchSize, layer);
253 template<
typename Architecture_t,
typename Layer_t>
254 inline void TNet<Architecture_t, Layer_t>::Initialize(EInitialization m)
256 for (
auto &l : fLayers) {
262 template<
typename Architecture_t,
typename Layer_t>
263 inline void TNet<Architecture_t, Layer_t>::InitializeGradients()
265 for (
auto &l : fLayers) {
266 initialize<Architecture_t>(l.GetWeightGradients(), EInitialization::kZero);
267 initialize<Architecture_t>(l.GetBiasGradients(), EInitialization::kZero);
272 template<
typename Architecture_t,
typename Layer_t>
273 inline void TNet<Architecture_t, Layer_t>::Forward(Matrix_t &input,
276 fLayers.front().Forward(input, applyDropout);
278 for (
size_t i = 1; i < fLayers.size(); i++) {
279 fLayers[i].Forward(fLayers[i-1].GetOutput(), applyDropout);
284 template <
typename Architecture_t,
typename Layer_t>
285 inline void TNet<Architecture_t, Layer_t>::Backward(
const Matrix_t &X,
const Matrix_t &Y,
const Matrix_t &weights)
288 evaluateGradients<Architecture_t>(fLayers.back().GetActivationGradients(), fJ, Y, fLayers.back().GetOutput(),
291 for (
size_t i = fLayers.size()-1; i > 0; i--) {
292 auto & activation_gradient_backward
293 = fLayers[i-1].GetActivationGradients();
294 auto & activations_backward
295 = fLayers[i-1].GetOutput();
296 fLayers[i].Backward(activation_gradient_backward,
297 activations_backward, fR, fWeightDecay);
299 fLayers[0].Backward(fDummy, X, fR, fWeightDecay);
304 template <
typename Architecture_t,
typename Layer_t>
305 inline auto TNet<Architecture_t, Layer_t>::Loss(
const Matrix_t &Y,
const Matrix_t &weights,
306 bool includeRegularization)
const -> Scalar_t
308 auto loss = evaluate<Architecture_t>(fJ, Y, fLayers.back().GetOutput(), weights);
309 includeRegularization &= (fR != ERegularization::kNone);
310 if (includeRegularization) {
311 for (
auto &l : fLayers) {
312 loss += fWeightDecay * regularization<Architecture_t>(l.GetWeights(), fR);
319 template <
typename Architecture_t,
typename Layer_t>
320 inline auto TNet<Architecture_t, Layer_t>::Loss(Matrix_t &X,
const Matrix_t &Y,
const Matrix_t &weights,
321 bool applyDropout,
bool includeRegularization) -> Scalar_t
323 Forward(X, applyDropout);
324 return Loss(Y, weights, includeRegularization);
328 template<
typename Architecture_t,
typename Layer_t>
329 inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Yhat,
334 evaluate<Architecture_t>(Yhat, f, fLayers.back().GetOutput());
338 template<
typename Architecture_t,
typename Layer_t>
339 inline void TNet<Architecture_t, Layer_t>::Prediction(Matrix_t &Y_hat,
340 EOutputFunction f)
const
342 evaluate<Architecture_t>(Y_hat, f, fLayers.back().GetOutput());
346 template<
typename Architecture_t,
typename Layer_t>
347 auto TNet<Architecture_t, Layer_t>::GetNFlops()
352 Scalar_t nb = (Scalar_t) fBatchSize;
353 Scalar_t nlp = (Scalar_t) fInputWidth;
355 for(
size_t i = 0; i < fLayers.size(); i++) {
356 Layer_t & layer = fLayers[i];
357 Scalar_t nl = (Scalar_t) layer.GetWidth();
360 flops += nb * nl * (2.0 * nlp - 1);
362 flops += 2 * nb * nl;
366 flops += nlp * nl * (2.0 * nb - 1.0);
367 flops += nl * (nb - 1);
369 flops += nlp * nb * (2.0 * nl - 1.0);
377 template<
typename Architecture_t,
typename Layer_t>
378 void TNet<Architecture_t, Layer_t>::SetDropoutProbabilities(
379 const std::vector<Double_t> & probabilities)
381 for (
size_t i = 0; i < fLayers.size(); i++) {
382 if (i < probabilities.size()) {
383 fLayers[i].SetDropoutProbability(probabilities[i]);
385 fLayers[i].SetDropoutProbability(1.0);
391 template<
typename Architecture_t,
typename Layer_t>
392 void TNet<Architecture_t, Layer_t>::Print()
394 std::cout <<
"DEEP NEURAL NETWORK:";
395 std::cout <<
" Loss function = " <<
static_cast<char>(fJ);
396 std::cout <<
", Depth = " << fLayers.size() << std::endl;
399 for (
auto & l : fLayers) {
400 std::cout <<
"DNN Layer " << i <<
":" << std::endl;