1 #ifndef TMVA_NEURAL_NET_I
2 #define TMVA_NEURAL_NET_I
4 #ifndef TMVA_NEURAL_NET
5 #error "Do not use NeuralNet.icc directly. #include \"NeuralNet.h\" instead."
6 #endif // TMVA_NEURAL_NET
9 #pragma GCC diagnostic ignored "-Wunused-variable"
34 T uniformFromTo (T from, T to)
36 return from + (rand ()* (to - from)/RAND_MAX);
41 template <
typename Container,
typename T>
42 void uniformDouble (Container& container, T maxValue)
44 for (
auto it = begin (container), itEnd = end (container); it != itEnd; ++it)
47 (*it) = TMVA::DNN::uniformFromTo (-1.0*maxValue, 1.0*maxValue);
52 extern std::shared_ptr<std::function<double(double)>> ZeroFnc;
55 extern std::shared_ptr<std::function<double(double)>> Sigmoid;
56 extern std::shared_ptr<std::function<double(double)>> InvSigmoid;
58 extern std::shared_ptr<std::function<double(double)>> Tanh;
59 extern std::shared_ptr<std::function<double(double)>> InvTanh;
61 extern std::shared_ptr<std::function<double(double)>> Linear;
62 extern std::shared_ptr<std::function<double(double)>> InvLinear;
64 extern std::shared_ptr<std::function<double(double)>> SymmReLU;
65 extern std::shared_ptr<std::function<double(double)>> InvSymmReLU;
67 extern std::shared_ptr<std::function<double(double)>> ReLU;
68 extern std::shared_ptr<std::function<double(double)>> InvReLU;
70 extern std::shared_ptr<std::function<double(double)>> SoftPlus;
71 extern std::shared_ptr<std::function<double(double)>> InvSoftPlus;
73 extern std::shared_ptr<std::function<double(double)>> TanhShift;
74 extern std::shared_ptr<std::function<double(double)>> InvTanhShift;
76 extern std::shared_ptr<std::function<double(double)>> SoftSign;
77 extern std::shared_ptr<std::function<double(double)>> InvSoftSign;
79 extern std::shared_ptr<std::function<double(double)>> Gauss;
80 extern std::shared_ptr<std::function<double(double)>> InvGauss;
82 extern std::shared_ptr<std::function<double(double)>> GaussComplement;
83 extern std::shared_ptr<std::function<double(double)>> InvGaussComplement;
90 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItTarget,
typename ItDrop>
91 void applyWeights (ItSource itSourceBegin, ItSource itSourceEnd,
93 ItTarget itTargetBegin, ItTarget itTargetEnd,
96 for (
auto itSource = itSourceBegin; itSource != itSourceEnd; ++itSource)
98 for (
auto itTarget = itTargetBegin; itTarget != itTargetEnd; ++itTarget)
100 if (!HasDropOut || *itDrop)
101 (*itTarget) += (*itSource) * (*itWeight);
104 if (HasDropOut) ++itDrop;
117 template <
bool HasDropOut,
typename ItSource,
typename ItWeight,
typename ItPrev,
typename ItDrop>
118 void applyWeightsBackwards (ItSource itCurrBegin, ItSource itCurrEnd,
120 ItPrev itPrevBegin, ItPrev itPrevEnd,
123 for (
auto itPrev = itPrevBegin; itPrev != itPrevEnd; ++itPrev)
125 for (
auto itCurr = itCurrBegin; itCurr != itCurrEnd; ++itCurr)
127 if (!HasDropOut || *itDrop)
128 (*itPrev) += (*itCurr) * (*itWeight);
131 if (HasDropOut) ++itDrop;
146 template <
typename ItValue,
typename Fnc>
147 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc)
149 while (itValue != itValueEnd)
151 auto& value = (*itValue);
152 value = (*fnc.get ()) (value);
163 template <
typename ItValue,
typename Fnc,
typename InvFnc,
typename ItGradient>
164 void applyFunctions (ItValue itValue, ItValue itValueEnd, Fnc fnc, InvFnc invFnc, ItGradient itGradient)
166 while (itValue != itValueEnd)
168 auto& value = (*itValue);
169 value = (*fnc.get ()) (value);
170 (*itGradient) = (*invFnc.get ()) (value);
172 ++itValue; ++itGradient;
182 template <
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient>
183 void update (ItSource itSource, ItSource itSourceEnd,
184 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
185 ItTargetGradient itTargetGradientBegin,
186 ItGradient itGradient)
188 while (itSource != itSourceEnd)
190 auto itTargetDelta = itTargetDeltaBegin;
191 auto itTargetGradient = itTargetGradientBegin;
192 while (itTargetDelta != itTargetDeltaEnd)
194 (*itGradient) -= (*itTargetDelta) * (*itSource) * (*itTargetGradient);
195 ++itTargetDelta; ++itTargetGradient; ++itGradient;
208 template <EnumRegularization Regularization>
209 inline double computeRegularization (
double weight,
const double& factorWeightDecay)
212 MATH_UNUSED(factorWeightDecay);
219 inline double computeRegularization<EnumRegularization::L1> (
double weight,
const double& factorWeightDecay)
221 return weight == 0.0 ? 0.0 : std::copysign (factorWeightDecay, weight);
226 inline double computeRegularization<EnumRegularization::L2> (
double weight,
const double& factorWeightDecay)
228 return factorWeightDecay * weight;
236 template <EnumRegularization Regularization,
typename ItSource,
typename ItDelta,
typename ItTargetGradient,
typename ItGradient,
typename ItWeight>
237 void update (ItSource itSource, ItSource itSourceEnd,
238 ItDelta itTargetDeltaBegin, ItDelta itTargetDeltaEnd,
239 ItTargetGradient itTargetGradientBegin,
240 ItGradient itGradient,
241 ItWeight itWeight,
double weightDecay)
244 while (itSource != itSourceEnd)
246 auto itTargetDelta = itTargetDeltaBegin;
247 auto itTargetGradient = itTargetGradientBegin;
248 while (itTargetDelta != itTargetDeltaEnd)
250 (*itGradient) -= + (*itTargetDelta) * (*itSource) * (*itTargetGradient) + computeRegularization<Regularization>(*itWeight,weightDecay);
251 ++itTargetDelta; ++itTargetGradient; ++itGradient; ++itWeight;
262 #define USELOCALWEIGHTS 1
270 template <
typename Function,
typename Weights,
typename PassThrough>
271 double Steepest::operator() (Function& fitnessFunction, Weights& weights, PassThrough& passThrough)
273 size_t numWeights = weights.size ();
275 m_localGradients.assign (numWeights, 0.0);
278 m_localWeights.assign (begin (weights), end (weights));
281 if (m_prevGradients.size () != numWeights)
283 m_prevGradients.clear ();
284 m_prevGradients.assign (weights.size (), 0);
288 size_t currentRepetition = 0;
291 if (currentRepetition >= m_repetitions)
294 m_localGradients.assign (numWeights, 0.0);
298 auto itPrevG = begin (m_prevGradients);
299 auto itPrevGEnd = end (m_prevGradients);
300 auto itLocWeight = begin (m_localWeights);
301 for (; itPrevG != itPrevGEnd; ++itPrevG, ++itLocWeight)
303 (*itPrevG) *= m_beta;
304 (*itLocWeight) += (*itPrevG);
307 E = fitnessFunction (passThrough, m_localWeights, m_localGradients);
311 double alpha = gaussDouble (m_alpha, m_alpha/2.0);
314 auto itG = begin (m_localGradients);
315 auto itGEnd = end (m_localGradients);
316 itPrevG = begin (m_prevGradients);
317 double maxGrad = 0.0;
318 for (; itG != itGEnd; ++itG, ++itPrevG)
320 double currGrad = (*itG);
321 double prevGrad = (*itPrevG);
325 currGrad += prevGrad;
327 (*itPrevG) = currGrad;
329 if (std::fabs (currGrad) > maxGrad)
336 std::cout <<
"\nlearning rate reduced to " << m_alpha << std::endl;
337 std::for_each (weights.begin (), weights.end (), [maxGrad](
double& w)
341 m_prevGradients.clear ();
345 auto itW = std::begin (weights);
346 std::for_each (std::begin (m_localGradients), std::end (m_localGradients), [&itW](
double& g)
381 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename InvFnc>
382 double sumOfSquares (ItOutput itOutputBegin, ItOutput itOutputEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, InvFnc invFnc,
double patternWeight)
384 double errorSum = 0.0;
387 ItTruth itTruth = itTruthBegin;
388 bool hasDeltas = (itDelta != itDeltaEnd);
389 for (ItOutput itOutput = itOutputBegin; itOutput != itOutputEnd; ++itOutput, ++itTruth)
392 double output = (*itOutput);
393 double error = output - (*itTruth);
396 (*itDelta) = (*invFnc.get ()) (output) * error * patternWeight;
399 errorSum += error*error * patternWeight;
411 template <
typename ItProbability,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
412 double crossEntropy (ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
414 bool hasDeltas = (itDelta != itDeltaEnd);
416 double errorSum = 0.0;
417 for (ItProbability itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability)
419 double probability = *itProbability;
420 double truth = *itTruthBegin;
423 truth = truth < 0.5 ? 0.1 : 0.9;
426 double delta = probability - truth;
427 (*itDelta) = delta*patternWeight;
432 if (probability == 0)
437 else if (probability == 1)
443 error += - (truth * log (probability) + (1.0-truth) * log (1.0-probability));
444 errorSum += error * patternWeight;
457 template <
typename ItOutput,
typename ItTruth,
typename ItDelta,
typename ItInvActFnc>
458 double softMaxCrossEntropy (ItOutput itProbabilityBegin, ItOutput itProbabilityEnd, ItTruth itTruthBegin, ItTruth , ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc ,
double patternWeight)
460 double errorSum = 0.0;
462 bool hasDeltas = (itDelta != itDeltaEnd);
464 ItTruth itTruth = itTruthBegin;
465 for (
auto itProbability = itProbabilityBegin; itProbability != itProbabilityEnd; ++itProbability, ++itTruth)
468 double probability = (*itProbability);
469 double truth = (*itTruth);
472 (*itDelta) = probability - truth;
478 error += truth * log (probability);
482 return -errorSum * patternWeight;
497 template <
typename ItWeight>
498 double weightDecay (
double error, ItWeight itWeight, ItWeight itWeightEnd,
double factorWeightDecay, EnumRegularization eRegularization)
500 if (eRegularization == EnumRegularization::L1)
505 for (; itWeight != itWeightEnd; ++itWeight, ++n)
507 double weight = (*itWeight);
508 w += std::fabs (weight);
510 return error + 0.5 * w * factorWeightDecay / n;
512 else if (eRegularization == EnumRegularization::L2)
517 for (; itWeight != itWeightEnd; ++itWeight, ++n)
519 double weight = (*itWeight);
522 return error + 0.5 * w * factorWeightDecay / n;
545 template <
typename LAYERDATA>
546 void forward (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
548 if (prevLayerData.hasDropOut ())
550 applyWeights<true> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
551 currLayerData.weightsBegin (),
552 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
553 prevLayerData.dropOut ());
558 applyWeights<false> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
559 currLayerData.weightsBegin (),
560 currLayerData.valuesBegin (), currLayerData.valuesEnd (),
571 template <
typename LAYERDATA>
572 void backward (LAYERDATA& prevLayerData, LAYERDATA& currLayerData)
574 if (prevLayerData.hasDropOut ())
576 applyWeightsBackwards<true> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
577 currLayerData.weightsBegin (),
578 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
579 prevLayerData.dropOut ());
584 applyWeightsBackwards<false> (currLayerData.deltasBegin (), currLayerData.deltasEnd (),
585 currLayerData.weightsBegin (),
586 prevLayerData.deltasBegin (), prevLayerData.deltasEnd (),
599 template <
typename LAYERDATA>
600 void update (
const LAYERDATA& prevLayerData, LAYERDATA& currLayerData,
double factorWeightDecay, EnumRegularization regularization)
603 if (factorWeightDecay != 0.0)
604 if (regularization == EnumRegularization::L1)
606 update<EnumRegularization::L1> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
607 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
608 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
609 currLayerData.weightsBegin (), factorWeightDecay);
611 else if (regularization == EnumRegularization::L2)
613 update<EnumRegularization::L2> (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
614 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
615 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin (),
616 currLayerData.weightsBegin (), factorWeightDecay);
620 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
621 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
622 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
627 update (prevLayerData.valuesBegin (), prevLayerData.valuesEnd (),
628 currLayerData.deltasBegin (), currLayerData.deltasEnd (),
629 currLayerData.valueGradientsBegin (), currLayerData.gradientsBegin ());
651 template <
typename WeightsType,
typename DropProbabilities>
652 void Net::dropOutWeightFactor (WeightsType& weights,
653 const DropProbabilities& drops,
656 if (drops.empty () || weights.empty ())
659 auto itWeight = std::begin (weights);
660 auto itWeightEnd = std::end (weights);
661 auto itDrop = std::begin (drops);
662 auto itDropEnd = std::end (drops);
663 size_t numNodesPrev = inputSize ();
664 double dropFractionPrev = *itDrop;
667 for (
auto& layer : layers ())
669 if (itDrop == itDropEnd)
672 size_t _numNodes = layer.numNodes ();
674 double dropFraction = *itDrop;
675 double pPrev = 1.0 - dropFractionPrev;
676 double p = 1.0 - dropFraction;
683 size_t _numWeights = layer.numWeights (numNodesPrev);
684 for (
size_t iWeight = 0; iWeight < _numWeights; ++iWeight)
686 if (itWeight == itWeightEnd)
692 numNodesPrev = _numNodes;
693 dropFractionPrev = dropFraction;
711 template <
typename Minimizer>
712 double Net::train (std::vector<double>& weights,
713 std::vector<Pattern>& trainPattern,
714 const std::vector<Pattern>& testPattern,
715 Minimizer& minimizer,
719 settings.startTrainCycle ();
722 if (fIPyMaxIter) *fIPyMaxIter = 100;
725 settings.create (
"trainErrors", 100, 0, 100, 100, 0,1);
726 settings.create (
"testErrors", 100, 0, 100, 100, 0,1);
728 size_t cycleCount = 0;
729 size_t testCycleCount = 0;
730 double testError = 1e20;
731 double trainError = 1e20;
732 size_t dropOutChangeCount = 0;
734 DropContainer dropContainer;
735 DropContainer dropContainerTest;
736 const std::vector<double>& dropFractions = settings.dropFractions ();
737 bool isWeightsForDrop =
false;
746 size_t dropIndex = 0;
747 if (!dropFractions.empty () && dropOutChangeCount % settings.dropRepetitions () == 0)
750 dropContainer.clear ();
751 size_t _numNodes = inputSize ();
752 double dropFraction = 0.0;
753 dropFraction = dropFractions.at (dropIndex);
755 fillDropContainer (dropContainer, dropFraction, _numNodes);
756 for (
auto itLayer = begin (m_layers), itLayerEnd = end (m_layers); itLayer != itLayerEnd; ++itLayer, ++dropIndex)
758 auto& layer = *itLayer;
759 _numNodes = layer.numNodes ();
762 if (dropFractions.size () > dropIndex)
763 dropFraction = dropFractions.at (dropIndex);
765 fillDropContainer (dropContainer, dropFraction, _numNodes);
767 isWeightsForDrop =
true;
771 trainError = trainCycle (minimizer, weights, begin (trainPattern), end (trainPattern), settings, dropContainer);
775 bool hasConverged =
false;
776 if (testCycleCount % settings.testRepetitions () == 0)
778 if (isWeightsForDrop)
780 dropOutWeightFactor (weights, dropFractions);
781 isWeightsForDrop =
false;
787 settings.startTestCycle ();
788 if (settings.useMultithreading ())
790 size_t numThreads = std::thread::hardware_concurrency ();
791 size_t patternPerThread = testPattern.size () / numThreads;
792 std::vector<Batch> batches;
793 auto itPat = testPattern.begin ();
795 for (
size_t idxThread = 0; idxThread < numThreads-1; ++idxThread)
797 batches.push_back (Batch (itPat, itPat + patternPerThread));
798 itPat += patternPerThread;
800 if (itPat != testPattern.end ())
801 batches.push_back (Batch (itPat, testPattern.end ()));
803 std::vector<std::future<std::tuple<double,std::vector<double>>>> futures;
804 for (
auto& batch : batches)
808 std::async (std::launch::async, [&]()
810 std::vector<double> localOutput;
811 pass_through_type passThrough (settings, batch, dropContainerTest);
812 double testBatchError = (*this) (passThrough, weights, ModeOutput::FETCH, localOutput);
813 return std::make_tuple (testBatchError, localOutput);
818 auto itBatch = batches.begin ();
819 for (
auto& f : futures)
821 std::tuple<double,std::vector<double>> result = f.get ();
822 testError += std::get<0>(result) / batches.size ();
823 std::vector<double> output = std::get<1>(result);
824 if (output.size() == (outputSize() - 1) * itBatch->size())
826 auto output_iterator = output.begin();
827 for (
auto pattern_it = itBatch->begin(); pattern_it != itBatch->end(); ++pattern_it)
829 for (
size_t output_index = 1; output_index < outputSize(); ++output_index)
831 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
832 (*pattern_it).weight ());
843 std::vector<double> output;
849 Batch batch (begin (testPattern), end (testPattern));
851 pass_through_type passThrough (settings, batch, dropContainerTest);
852 double testPatternError = (*this) (passThrough, weights, ModeOutput::FETCH, output);
853 if (output.size() == (outputSize() - 1) * batch.size())
855 auto output_iterator = output.begin();
856 for (
auto pattern_it = batch.begin(); pattern_it != batch.end(); ++pattern_it)
858 for (
size_t output_index = 1; output_index < outputSize(); ++output_index)
860 settings.testSample (0, *output_iterator, (*pattern_it).output ().at (0),
861 (*pattern_it).weight ());
866 testError += testPatternError;
870 settings.endTestCycle ();
873 settings.computeResult (*
this, weights);
875 hasConverged = settings.hasConverged (testError);
876 if (!hasConverged && !isWeightsForDrop)
878 dropOutWeightFactor (weights, dropFractions,
true);
879 isWeightsForDrop =
true;
883 ++dropOutChangeCount;
886 static double x = -1.0;
889 settings.addPoint (
"trainErrors", cycleCount, trainError);
890 settings.addPoint (
"testErrors", cycleCount, testError);
891 settings.plot (
"trainErrors",
"C", 1, kBlue);
892 settings.plot (
"testErrors",
"C", 1, kMagenta);
897 fInteractive->AddPoint(cycleCount, trainError, testError);
898 if (*fExitFromTraining)
break;
899 *fIPyCurrentIter = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
905 if ((
int)cycleCount % 10 == 0) {
907 TString convText = Form(
"(train/test/epo/conv/maxco): %.3g/%.3g/%d/%d/%d",
911 (
int)settings.convergenceCount (),
912 (int)settings.maxConvergenceCount ());
913 double progress = 100*(double)settings.maxConvergenceCount () /(double)settings.convergenceSteps ();
914 settings.cycle (progress, convText);
918 settings.endTrainCycle (trainError);
920 TString convText = Form(
"(train/test/epoch): %.4g/%.4g/%d", trainError, testError, (
int)cycleCount);
921 double progress = 100*(double)settings.maxConvergenceCount() /(double)settings.convergenceSteps ();
922 settings.cycle (progress, convText);
940 template <
typename Iterator,
typename Minimizer>
941 inline double Net::trainCycle (Minimizer& minimizer, std::vector<double>& weights,
942 Iterator itPatternBegin, Iterator itPatternEnd, Settings& settings, DropContainer& dropContainer)
945 size_t numPattern = std::distance (itPatternBegin, itPatternEnd);
946 size_t numBatches = numPattern/settings.batchSize ();
947 size_t numBatches_stored = numBatches;
949 std::shuffle(itPatternBegin, itPatternEnd, std::default_random_engine{});
950 Iterator itPatternBatchBegin = itPatternBegin;
951 Iterator itPatternBatchEnd = itPatternBatchBegin;
954 std::vector<Batch> batches;
955 while (numBatches > 0)
957 std::advance (itPatternBatchEnd, settings.batchSize ());
958 batches.push_back (Batch (itPatternBatchBegin, itPatternBatchEnd));
959 itPatternBatchBegin = itPatternBatchEnd;
964 if (itPatternBatchEnd != itPatternEnd)
965 batches.push_back (Batch (itPatternBatchEnd, itPatternEnd));
969 if (settings.useMultithreading ())
972 size_t numThreads = std::thread::hardware_concurrency ();
973 size_t batchesPerThread = batches.size () / numThreads;
974 typedef std::vector<Batch>::iterator batch_iterator;
975 std::vector<std::pair<batch_iterator,batch_iterator>> batchVec;
976 batch_iterator itBatchBegin = std::begin (batches);
977 batch_iterator itBatchCurrEnd = std::begin (batches);
978 batch_iterator itBatchEnd = std::end (batches);
979 for (
size_t iT = 0; iT < numThreads; ++iT)
981 if (iT == numThreads-1)
982 itBatchCurrEnd = itBatchEnd;
984 std::advance (itBatchCurrEnd, batchesPerThread);
985 batchVec.push_back (std::make_pair (itBatchBegin, itBatchCurrEnd));
986 itBatchBegin = itBatchCurrEnd;
990 std::vector<std::future<double>> futures;
991 for (
auto& batchRange : batchVec)
995 std::async (std::launch::async, [&]()
997 double localError = 0.0;
998 for (
auto it = batchRange.first, itEnd = batchRange.second; it != itEnd; ++it)
1001 pass_through_type settingsAndBatch (settings, batch, dropContainer);
1002 Minimizer minimizerClone (minimizer);
1003 localError += minimizerClone ((*
this), weights, settingsAndBatch);
1010 for (
auto& f : futures)
1015 for (
auto& batch : batches)
1017 std::tuple<Settings&, Batch&, DropContainer&> settingsAndBatch (settings, batch, dropContainer);
1018 error += minimizer ((*
this), weights, settingsAndBatch);
1022 numBatches_stored = std::max (numBatches_stored,
size_t(1));
1023 error /= numBatches_stored;
1024 settings.testIteration ();
1038 template <
typename Weights>
1039 std::vector<double> Net::compute (
const std::vector<double>& input,
const Weights& weights)
const
1041 std::vector<LayerData> layerData;
1042 layerData.reserve (m_layers.size ()+1);
1043 auto itWeight = begin (weights);
1044 auto itInputBegin = begin (input);
1045 auto itInputEnd = end (input);
1046 layerData.push_back (LayerData (itInputBegin, itInputEnd));
1047 size_t numNodesPrev = input.size ();
1050 for (
auto& layer: m_layers)
1052 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1053 layer.activationFunction (),
1054 layer.modeOutputValues ()));
1055 size_t _numWeights = layer.numWeights (numNodesPrev);
1056 itWeight += _numWeights;
1057 numNodesPrev = layer.numNodes ();
1062 forwardPattern (m_layers, layerData);
1065 std::vector<double> output;
1066 fetchOutput (layerData.back (), output);
1071 template <
typename Weights,
typename PassThrough>
1072 double Net::operator() (PassThrough& settingsAndBatch,
const Weights& weights)
const
1074 std::vector<double> nothing;
1075 assert (numWeights () == weights.size ());
1076 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, nothing,
false);
1080 template <
typename Weights,
typename PassThrough,
typename OutContainer>
1081 double Net::operator() (PassThrough& settingsAndBatch,
const Weights& weights, ModeOutput , OutContainer& outputContainer)
const
1083 std::vector<double> nothing;
1084 assert (numWeights () == weights.size ());
1085 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (nothing), std::end (nothing), 10000, outputContainer,
true);
1090 template <
typename Weights,
typename Gradients,
typename PassThrough>
1091 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients)
const
1093 std::vector<double> nothing;
1094 assert (numWeights () == weights.size ());
1095 assert (weights.size () == gradients.size ());
1096 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, nothing,
false);
1100 template <
typename Weights,
typename Gradients,
typename PassThrough,
typename OutContainer>
1101 double Net::operator() (PassThrough& settingsAndBatch, Weights& weights, Gradients& gradients, ModeOutput eFetch, OutContainer& outputContainer)
const
1103 MATH_UNUSED(eFetch);
1104 assert (numWeights () == weights.size ());
1105 assert (weights.size () == gradients.size ());
1106 double error = forward_backward(m_layers, settingsAndBatch, std::begin (weights), std::end (weights), std::begin (gradients), std::end (gradients), 0, outputContainer,
true);
1112 template <
typename LayerContainer,
typename DropContainer,
typename ItWeight,
typename ItGradient>
1113 std::vector<std::vector<LayerData>> Net::prepareLayerData (LayerContainer& _layers,
1115 const DropContainer& dropContainer,
1116 ItWeight itWeightBegin,
1118 ItGradient itGradientBegin,
1119 ItGradient itGradientEnd,
1120 size_t& totalNumWeights)
const
1122 LayerData::const_dropout_iterator itDropOut;
1123 bool usesDropOut = !dropContainer.empty ();
1125 itDropOut = std::begin (dropContainer);
1127 if (_layers.empty ())
1128 throw std::string (
"no layers in this net");
1134 totalNumWeights = 0;
1135 size_t totalNumNodes = 0;
1136 std::vector<std::vector<LayerData>> layerPatternData;
1137 layerPatternData.reserve (_layers.size ()+1);
1138 ItWeight itWeight = itWeightBegin;
1139 ItGradient itGradient = itGradientBegin;
1140 size_t numNodesPrev = inputSize ();
1141 typename Pattern::const_iterator itInputBegin;
1142 typename Pattern::const_iterator itInputEnd;
1151 layerPatternData.push_back (std::vector<LayerData>());
1152 for (
const Pattern& _pattern : batch)
1154 std::vector<LayerData>& layerData = layerPatternData.back ();
1155 layerData.push_back (LayerData (numNodesPrev));
1157 itInputBegin = _pattern.beginInput ();
1158 itInputEnd = _pattern.endInput ();
1159 layerData.back ().setInput (itInputBegin, itInputEnd);
1162 layerData.back ().setDropOut (itDropOut);
1168 itDropOut += _layers.back ().numNodes ();
1172 for (
auto itLayer = begin (_layers), itLayerEnd = end (_layers); itLayer != itLayerEnd; ++itLayer)
1174 bool isOutputLayer = (itLayer+1 == itLayerEnd);
1175 bool isFirstHiddenLayer = (itLayer == begin (_layers));
1177 auto& layer = *itLayer;
1178 layerPatternData.push_back (std::vector<LayerData>());
1180 for (
const Pattern& _pattern : batch)
1182 std::vector<LayerData>& layerData = layerPatternData.back ();
1185 if (itGradientBegin == itGradientEnd)
1187 layerData.push_back (LayerData (layer.numNodes (), itWeight,
1188 layer.activationFunction (),
1189 layer.modeOutputValues ()));
1193 layerData.push_back (LayerData (layer.numNodes (), itWeight, itGradient,
1194 layer.activationFunction (),
1195 layer.inverseActivationFunction (),
1196 layer.modeOutputValues ()));
1201 layerData.back ().setDropOut (itDropOut);
1208 itDropOut += layer.numNodes ();
1210 size_t _numWeights = layer.numWeights (numNodesPrev);
1211 totalNumWeights += _numWeights;
1212 itWeight += _numWeights;
1213 itGradient += _numWeights;
1214 numNodesPrev = layer.numNodes ();
1215 totalNumNodes += numNodesPrev;
1218 assert (totalNumWeights > 0);
1219 return layerPatternData;
1224 template <
typename LayerContainer>
1225 void Net::forwardPattern (
const LayerContainer& _layers,
1226 std::vector<LayerData>& layerData)
const
1228 size_t idxLayer = 0, idxLayerEnd = _layers.size ();
1229 size_t cumulativeNodeCount = 0;
1230 for (; idxLayer < idxLayerEnd; ++idxLayer)
1232 LayerData& prevLayerData = layerData.at (idxLayer);
1233 LayerData& currLayerData = layerData.at (idxLayer+1);
1235 forward (prevLayerData, currLayerData);
1237 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1244 template <
typename LayerContainer,
typename LayerPatternContainer>
1245 void Net::forwardBatch (
const LayerContainer& _layers,
1246 LayerPatternContainer& layerPatternData,
1247 std::vector<double>& valuesMean,
1248 std::vector<double>& valuesStdDev,
1249 size_t trainFromLayer)
const
1251 valuesMean.clear ();
1252 valuesStdDev.clear ();
1255 size_t cumulativeNodeCount = 0;
1256 for (
size_t idxLayer = 0, idxLayerEnd = layerPatternData.size (); idxLayer < idxLayerEnd-1; ++idxLayer)
1258 bool doTraining = idxLayer >= trainFromLayer;
1261 std::vector<LayerData>& prevLayerPatternData = layerPatternData.at (idxLayer);
1262 std::vector<LayerData>& currLayerPatternData = layerPatternData.at (idxLayer+1);
1264 size_t numPattern = prevLayerPatternData.size ();
1265 size_t numNodesLayer = _layers.at (idxLayer).numNodes ();
1267 std::vector<MeanVariance> means (numNodesLayer);
1269 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1271 const LayerData& prevLayerData = prevLayerPatternData.at (idxPattern);
1272 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1275 forward (prevLayerData, currLayerData);
1279 for (
size_t idxPattern = 0; idxPattern < numPattern; ++idxPattern)
1282 LayerData& currLayerData = currLayerPatternData.at (idxPattern);
1285 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction (),
1286 currLayerData.inverseActivationFunction (), currLayerData.valueGradientsBegin ());
1288 applyFunctions (currLayerData.valuesBegin (), currLayerData.valuesEnd (), currLayerData.activationFunction ());
1292 cumulativeNodeCount += numNodesLayer;
1299 template <
typename OutputContainer>
1300 void Net::fetchOutput (
const LayerData& lastLayerData, OutputContainer& outputContainer)
const
1302 ModeOutputValues eModeOutput = lastLayerData.outputMode ();
1303 if (isFlagSet (ModeOutputValues::DIRECT, eModeOutput))
1305 outputContainer.insert (outputContainer.end (), lastLayerData.valuesBegin (), lastLayerData.valuesEnd ());
1307 else if (isFlagSet (ModeOutputValues::SIGMOID, eModeOutput) ||
1308 isFlagSet (ModeOutputValues::SOFTMAX, eModeOutput))
1310 const auto& prob = lastLayerData.probabilities ();
1311 outputContainer.insert (outputContainer.end (), prob.begin (), prob.end ()) ;
1320 template <
typename OutputContainer>
1321 void Net::fetchOutput (
const std::vector<LayerData>& lastLayerPatternData, OutputContainer& outputContainer)
const
1323 for (
const LayerData& lastLayerData : lastLayerPatternData)
1324 fetchOutput (lastLayerData, outputContainer);
1329 template <
typename ItWeight>
1330 std::tuple<double,
double> Net::computeError (
const Settings& settings,
1331 std::vector<LayerData>& lastLayerData,
1333 ItWeight itWeightBegin,
1334 ItWeight itWeightEnd)
const
1336 typename std::vector<LayerData>::iterator itLayerData = lastLayerData.begin ();
1339 typename std::vector<Pattern>::const_iterator itPattern = batch.begin ();
1340 typename std::vector<Pattern>::const_iterator itPatternEnd = batch.end ();
1342 double sumWeights (0.0);
1343 double sumError (0.0);
1345 size_t idxPattern = 0;
1347 for ( ; itPattern != itPatternEnd; ++itPattern, ++itLayerData)
1352 LayerData& layerData = (*itLayerData);
1353 const Pattern& _pattern = (*itPattern);
1354 double error = errorFunction (layerData, _pattern.output (),
1355 itWeightBegin, itWeightEnd,
1356 _pattern.weight (), settings.factorWeightDecay (),
1357 settings.regularization ());
1358 sumWeights += fabs (_pattern.weight ());
1361 return std::make_tuple (sumError, sumWeights);
1366 template <
typename Settings>
1367 void Net::backPropagate (std::vector<std::vector<LayerData>>& layerPatternData,
1368 const Settings& settings,
1369 size_t trainFromLayer,
1370 size_t totalNumWeights)
const
1372 bool doTraining = layerPatternData.size () > trainFromLayer;
1376 size_t idxLayer = layerPatternData.size ();
1377 for (
auto itLayerPatternData = layerPatternData.rbegin (), itLayerPatternDataBegin = layerPatternData.rend ();
1378 itLayerPatternData != itLayerPatternDataBegin; ++itLayerPatternData)
1381 if (idxLayer <= trainFromLayer)
1384 std::vector<LayerData>& currLayerDataColl = *(itLayerPatternData);
1385 std::vector<LayerData>& prevLayerDataColl = *(itLayerPatternData+1);
1387 size_t idxPattern = 0;
1389 for (
typename std::vector<LayerData>::iterator itCurrLayerData = begin (currLayerDataColl), itCurrLayerDataEnd = end (currLayerDataColl),
1390 itPrevLayerData = begin (prevLayerDataColl) ;
1391 itCurrLayerData != itCurrLayerDataEnd; ++itCurrLayerData, ++itPrevLayerData, ++idxPattern)
1393 LayerData& currLayerData = (*itCurrLayerData);
1394 LayerData& prevLayerData = *(itPrevLayerData);
1396 backward (prevLayerData, currLayerData);
1405 update (prevLayerData, currLayerData, settings.factorWeightDecay ()/totalNumWeights, settings.regularization ());
1417 template <
typename LayerContainer,
typename PassThrough,
typename ItWeight,
typename ItGradient,
typename OutContainer>
1418 double Net::forward_backward (LayerContainer& _layers, PassThrough& settingsAndBatch,
1419 ItWeight itWeightBegin, ItWeight itWeightEnd,
1420 ItGradient itGradientBegin, ItGradient itGradientEnd,
1421 size_t trainFromLayer,
1422 OutContainer& outputContainer,
bool doFetchOutput)
const
1424 Settings& settings = std::get<0>(settingsAndBatch);
1425 Batch& batch = std::get<1>(settingsAndBatch);
1426 DropContainer& dropContainer = std::get<2>(settingsAndBatch);
1428 double sumError = 0.0;
1429 double sumWeights = 0.0;
1433 size_t totalNumWeights (0);
1434 std::vector<std::vector<LayerData>> layerPatternData = prepareLayerData (_layers,
1446 std::vector<double> valuesMean;
1447 std::vector<double> valuesStdDev;
1448 forwardBatch (_layers, layerPatternData, valuesMean, valuesStdDev, trainFromLayer);
1454 fetchOutput (layerPatternData.back (), outputContainer);
1459 std::tie (sumError, sumWeights) = computeError (settings, layerPatternData.back (), batch, itWeightBegin, itWeightBegin + totalNumWeights);
1463 backPropagate (layerPatternData, settings, trainFromLayer, totalNumWeights);
1467 double batchSize = std::distance (std::begin (batch), std::end (batch));
1468 for (
auto it = itGradientBegin; it != itGradientEnd; ++it)
1472 sumError /= sumWeights;
1482 template <
typename OutIterator>
1483 void Net::initializeWeights (WeightInitializationStrategy eInitStrategy, OutIterator itWeight)
1485 if (eInitStrategy == WeightInitializationStrategy::XAVIER)
1488 int numInput = inputSize ();
1495 for (
auto& layer: layers ())
1497 double nIn = numInput;
1498 double stdDev = sqrt (2.0/nIn);
1499 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1501 (*itWeight) = DNN::gaussDouble (0.0, stdDev);
1504 numInput = layer.numNodes ();
1509 if (eInitStrategy == WeightInitializationStrategy::XAVIERUNIFORM)
1512 int numInput = inputSize ();
1519 for (
auto& layer: layers ())
1521 double nIn = numInput;
1522 double minVal = -sqrt(2.0/nIn);
1523 double maxVal = sqrt (2.0/nIn);
1524 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1527 (*itWeight) = DNN::uniformDouble (minVal, maxVal);
1530 numInput = layer.numNodes ();
1535 if (eInitStrategy == WeightInitializationStrategy::TEST)
1538 int numInput = inputSize ();
1545 for (
auto& layer: layers ())
1548 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1550 (*itWeight) = DNN::gaussDouble (0.0, 0.1);
1553 numInput = layer.numNodes ();
1558 if (eInitStrategy == WeightInitializationStrategy::LAYERSIZE)
1561 int numInput = inputSize ();
1568 for (
auto& layer: layers ())
1570 double nIn = numInput;
1571 for (
size_t iWeight = 0, iWeightEnd = layer.numWeights (numInput); iWeight < iWeightEnd; ++iWeight)
1573 (*itWeight) = DNN::gaussDouble (0.0, sqrt (layer.numWeights (nIn)));
1576 numInput = layer.numNodes ();
1591 template <
typename Container,
typename ItWeight>
1592 double Net::errorFunction (LayerData& layerData,
1595 ItWeight itWeightEnd,
1596 double patternWeight,
1597 double factorWeightDecay,
1598 EnumRegularization eRegularization)
const
1601 switch (m_eErrorFunction)
1603 case ModeErrorFunction::SUMOFSQUARES:
1605 error = sumOfSquares (layerData.valuesBegin (), layerData.valuesEnd (), begin (truth), end (truth),
1606 layerData.deltasBegin (), layerData.deltasEnd (),
1607 layerData.inverseActivationFunction (),
1611 case ModeErrorFunction::CROSSENTROPY:
1613 assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1614 std::vector<double> probabilities = layerData.probabilities ();
1615 error = crossEntropy (begin (probabilities), end (probabilities),
1616 begin (truth), end (truth),
1617 layerData.deltasBegin (), layerData.deltasEnd (),
1618 layerData.inverseActivationFunction (),
1622 case ModeErrorFunction::CROSSENTROPY_MUTUALEXCLUSIVE:
1624 std::cout <<
"softmax." << std::endl;
1625 assert (!TMVA::DNN::isFlagSet (ModeOutputValues::DIRECT, layerData.outputMode ()));
1626 std::vector<double> probabilities = layerData.probabilities ();
1627 error = softMaxCrossEntropy (begin (probabilities), end (probabilities),
1628 begin (truth), end (truth),
1629 layerData.deltasBegin (), layerData.deltasEnd (),
1630 layerData.inverseActivationFunction (),
1635 if (factorWeightDecay != 0 && eRegularization != EnumRegularization::NONE)
1637 error = weightDecay (error, itWeight, itWeightEnd, factorWeightDecay, eRegularization);