12 std::shared_ptr<std::function<double(double)>> Gauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return exp (-std::pow(value*s,2.0)); });
13 std::shared_ptr<std::function<double(double)>> GaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return 1.0 - exp (-std::pow(value*s,2.0)); });
14 std::shared_ptr<std::function<double(double)>> InvGauss = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return -2.0 * value * s*s * (*Gauss.get ()) (value); });
15 std::shared_ptr<std::function<double(double)>> InvGaussComplement = std::make_shared<std::function<double(double)>> ([](
double value){
const double s = 6.0;
return +2.0 * value * s*s * (*GaussComplement.get ()) (value); });
16 std::shared_ptr<std::function<double(double)>> InvLinear = std::make_shared<std::function<double(double)>> ([](
double ){
return 1.0; });
17 std::shared_ptr<std::function<double(double)>> InvReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? 1.0 : 0; });
18 std::shared_ptr<std::function<double(double)>> InvSigmoid = std::make_shared<std::function<double(double)>> ([](
double value){
double s = (*Sigmoid.get ()) (value);
return s*(1.0-s); });
19 std::shared_ptr<std::function<double(double)>> InvSoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 / (1.0 + std::exp (-value)); });
20 std::shared_ptr<std::function<double(double)>> InvSoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return std::pow ((1.0 - fabs (value)),2.0); });
21 std::shared_ptr<std::function<double(double)>> InvSymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? 1.0 : value < -margin ? 1.0 : 0; });
22 std::shared_ptr<std::function<double(double)>> InvTanh = std::make_shared<std::function<double(double)>> ([](
double value){
return 1.0 - std::pow (value, 2.0); });
23 std::shared_ptr<std::function<double(double)>> InvTanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return 0.3 + (1.0 - std::pow (value, 2.0)); });
24 std::shared_ptr<std::function<double(double)>> Linear = std::make_shared<std::function<double(double)>> ([](
double value){
return value; });
25 std::shared_ptr<std::function<double(double)>> ReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.0;
return value > margin ? value-margin : 0; });
26 std::shared_ptr<std::function<double(double)>> Sigmoid = std::make_shared<std::function<double(double)>> ([](
double value){ value = std::max (-100.0, std::min (100.0,value));
return 1.0/(1.0 + std::exp (-value)); });
27 std::shared_ptr<std::function<double(double)>> SoftPlus = std::make_shared<std::function<double(double)>> ([](
double value){
return std::log (1.0+ std::exp (value)); });
28 std::shared_ptr<std::function<double(double)>> ZeroFnc = std::make_shared<std::function<double(double)>> ([](
double ){
return 0; });
29 std::shared_ptr<std::function<double(double)>> Tanh = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value); });
30 std::shared_ptr<std::function<double(double)>> SymmReLU = std::make_shared<std::function<double(double)>> ([](
double value){
const double margin = 0.3;
return value > margin ? value-margin : value < -margin ? value+margin : 0; });
31 std::shared_ptr<std::function<double(double)>> TanhShift = std::make_shared<std::function<double(double)>> ([](
double value){
return tanh (value-0.3); });
32 std::shared_ptr<std::function<double(double)>> SoftSign = std::make_shared<std::function<double(double)>> ([](
double value){
return value / (1.0 + fabs (value)); });
35 double gaussDouble (
double mean,
double sigma)
37 static std::default_random_engine generator;
38 std::normal_distribution<double> distribution (mean, sigma);
39 return distribution (generator);
43 double uniformDouble (
double minValue,
double maxValue)
45 static std::default_random_engine generator;
46 std::uniform_real_distribution<double> distribution(minValue, maxValue);
47 return distribution(generator);
52 int randomInt (
int maxValue)
54 static std::default_random_engine generator;
55 std::uniform_int_distribution<int> distribution(0,maxValue-1);
56 return distribution(generator);
60 double studenttDouble (
double distributionParameter)
62 static std::default_random_engine generator;
63 std::student_t_distribution<double> distribution (distributionParameter);
64 return distribution (generator);
68 LayerData::LayerData (
size_t inputSize)
69 : m_hasDropOut (false)
70 , m_isInputLayer (true)
71 , m_hasWeights (false)
72 , m_hasGradients (false)
73 , m_eModeOutput (ModeOutputValues::DIRECT)
76 m_deltas.assign (m_size, 0);
81 LayerData::LayerData (const_iterator_type itInputBegin, const_iterator_type itInputEnd, ModeOutputValues eModeOutput)
82 : m_hasDropOut (false)
83 , m_isInputLayer (true)
84 , m_hasWeights (false)
85 , m_hasGradients (false)
86 , m_eModeOutput (eModeOutput)
88 m_itInputBegin = itInputBegin;
89 m_itInputEnd = itInputEnd;
90 m_size = std::distance (itInputBegin, itInputEnd);
91 m_deltas.assign (m_size, 0);
97 LayerData::LayerData (
size_t _size,
98 const_iterator_type itWeightBegin,
99 iterator_type itGradientBegin,
100 std::shared_ptr<std::function<
double(
double)>> _activationFunction,
101 std::shared_ptr<std::function<
double(
double)>> _inverseActivationFunction,
102 ModeOutputValues eModeOutput)
104 , m_hasDropOut (false)
105 , m_itConstWeightBegin (itWeightBegin)
106 , m_itGradientBegin (itGradientBegin)
107 , m_activationFunction (_activationFunction)
108 , m_inverseActivationFunction (_inverseActivationFunction)
109 , m_isInputLayer (false)
110 , m_hasWeights (true)
111 , m_hasGradients (true)
112 , m_eModeOutput (eModeOutput)
114 m_values.assign (_size, 0);
115 m_deltas.assign (_size, 0);
116 m_valueGradients.assign (_size, 0);
122 LayerData::LayerData (
size_t _size, const_iterator_type itWeightBegin,
123 std::shared_ptr<std::function<
double(
double)>> _activationFunction,
124 ModeOutputValues eModeOutput)
126 , m_hasDropOut (false)
127 , m_itConstWeightBegin (itWeightBegin)
128 , m_activationFunction (_activationFunction)
129 , m_inverseActivationFunction ()
130 , m_isInputLayer (false)
131 , m_hasWeights (true)
132 , m_hasGradients (false)
133 , m_eModeOutput (eModeOutput)
135 m_values.assign (_size, 0);
140 typename LayerData::container_type LayerData::computeProbabilities ()
const
142 container_type probabilitiesContainer;
143 if (TMVA::DNN::isFlagSet (ModeOutputValues::SIGMOID, m_eModeOutput))
145 std::transform (begin (m_values), end (m_values), std::back_inserter (probabilitiesContainer), (*Sigmoid.get ()));
147 else if (TMVA::DNN::isFlagSet (ModeOutputValues::SOFTMAX, m_eModeOutput))
150 probabilitiesContainer = m_values;
151 std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [&sum](
double& p){ p = std::exp (p); sum += p; });
153 std::for_each (begin (probabilitiesContainer), end (probabilitiesContainer), [sum ](
double& p){ p /= sum; });
157 probabilitiesContainer.assign (begin (m_values), end (m_values));
159 return probabilitiesContainer;
166 Layer::Layer (
size_t _numNodes, EnumFunction _activationFunction, ModeOutputValues eModeOutputValues)
167 : m_numNodes (_numNodes)
168 , m_eModeOutputValues (eModeOutputValues)
169 , m_activationFunctionType (_activationFunction)
171 for (
size_t iNode = 0; iNode < _numNodes; ++iNode)
173 auto actFnc = Linear;
174 auto invActFnc = InvLinear;
175 switch (_activationFunction)
177 case EnumFunction::ZERO:
181 case EnumFunction::LINEAR:
183 invActFnc = InvLinear;
185 case EnumFunction::TANH:
189 case EnumFunction::RELU:
193 case EnumFunction::SYMMRELU:
195 invActFnc = InvSymmReLU;
197 case EnumFunction::TANHSHIFT:
199 invActFnc = InvTanhShift;
201 case EnumFunction::SOFTSIGN:
203 invActFnc = InvSoftSign;
205 case EnumFunction::SIGMOID:
207 invActFnc = InvSigmoid;
209 case EnumFunction::GAUSS:
211 invActFnc = InvGauss;
213 case EnumFunction::GAUSSCOMPLEMENT:
214 actFnc = GaussComplement;
215 invActFnc = InvGaussComplement;
218 m_activationFunction = actFnc;
219 m_inverseActivationFunction = invActFnc;
232 Settings::Settings (TString name,
233 size_t _convergenceSteps,
size_t _batchSize,
size_t _testRepetitions,
234 double _factorWeightDecay, EnumRegularization eRegularization,
235 MinimizerType _eMinimizerType,
double _learningRate,
236 double _momentum,
int _repetitions,
bool _useMultithreading)
237 : m_timer (100, name)
239 , m_maxProgress (100)
240 , m_convergenceSteps (_convergenceSteps)
241 , m_batchSize (_batchSize)
242 , m_testRepetitions (_testRepetitions)
243 , m_factorWeightDecay (_factorWeightDecay)
248 , m_regularization (eRegularization)
249 , fLearningRate (_learningRate)
250 , fMomentum (_momentum)
251 , fRepetitions (_repetitions)
252 , fMinimizerType (_eMinimizerType)
253 , m_convergenceCount (0)
254 , m_maxConvergenceCount (0)
256 , m_useMultithreading (_useMultithreading)
261 Settings::~Settings ()
281 void ClassificationSettings::startTrainCycle ()
285 create (
"ROC", 100, 0, 1, 100, 0, 1);
286 create (
"Significance", 100, 0, 1, 100, 0, 3);
287 create (
"OutputSig", 100, 0, 1);
288 create (
"OutputBkg", 100, 0, 1);
289 fMonitoring->ProcessEvents ();
296 void ClassificationSettings::endTrainCycle (
double )
298 if (fMonitoring) fMonitoring->ProcessEvents ();
304 void ClassificationSettings::testSample (
double ,
double output,
double target,
double weight)
307 m_output.push_back (output);
308 m_targets.push_back (target);
309 m_weights.push_back (weight);
316 void ClassificationSettings::startTestCycle ()
326 void ClassificationSettings::endTestCycle ()
328 if (m_output.empty ())
330 double minVal = *std::min_element (begin (m_output), end (m_output));
331 double maxVal = *std::max_element (begin (m_output), end (m_output));
332 const size_t numBinsROC = 1000;
333 const size_t numBinsData = 100;
335 std::vector<double> truePositives (numBinsROC+1, 0);
336 std::vector<double> falsePositives (numBinsROC+1, 0);
337 std::vector<double> trueNegatives (numBinsROC+1, 0);
338 std::vector<double> falseNegatives (numBinsROC+1, 0);
340 std::vector<double> x (numBinsData, 0);
341 std::vector<double> datSig (numBinsData+1, 0);
342 std::vector<double> datBkg (numBinsData+1, 0);
344 double binSizeROC = (maxVal - minVal)/(
double)numBinsROC;
345 double binSizeData = (maxVal - minVal)/(
double)numBinsData;
347 double sumWeightsSig = 0.0;
348 double sumWeightsBkg = 0.0;
350 for (
size_t b = 0; b < numBinsData; ++b)
352 double binData = minVal + b*binSizeData;
356 if (fabs(binSizeROC) < 0.0001)
359 for (
size_t i = 0, iEnd = m_output.size (); i < iEnd; ++i)
361 double val = m_output.at (i);
362 double truth = m_targets.at (i);
363 double weight = m_weights.at (i);
365 bool isSignal = (truth > 0.5 ?
true :
false);
367 if (m_sumOfSigWeights != 0 && m_sumOfBkgWeights != 0)
370 weight *= m_sumOfSigWeights;
372 weight *= m_sumOfBkgWeights;
375 size_t binROC = (val-minVal)/binSizeROC;
376 size_t binData = (val-minVal)/binSizeData;
380 for (
size_t n = 0; n <= binROC; ++n)
382 truePositives.at (n) += weight;
384 for (
size_t n = binROC+1; n < numBinsROC; ++n)
386 falseNegatives.at (n) += weight;
389 datSig.at (binData) += weight;
390 sumWeightsSig += weight;
394 for (
size_t n = 0; n <= binROC; ++n)
396 falsePositives.at (n) += weight;
398 for (
size_t n = binROC+1; n < numBinsROC; ++n)
400 trueNegatives.at (n) += weight;
403 datBkg.at (binData) += weight;
404 sumWeightsBkg += weight;
408 std::vector<double> sigEff;
409 std::vector<double> backRej;
411 double bestSignificance = 0;
412 double bestCutSignificance = 0;
414 double numEventsScaleFactor = 1.0;
415 if (m_scaleToNumEvents > 0)
417 size_t numEvents = m_output.size ();
418 numEventsScaleFactor = double (m_scaleToNumEvents)/double (numEvents);
422 clear (
"Significance");
424 for (
size_t i = 0; i < numBinsROC; ++i)
426 double tp = truePositives.at (i) * numEventsScaleFactor;
427 double fp = falsePositives.at (i) * numEventsScaleFactor;
428 double tn = trueNegatives.at (i) * numEventsScaleFactor;
429 double fn = falseNegatives.at (i) * numEventsScaleFactor;
431 double seff = (tp+fn == 0.0 ? 1.0 : (tp / (tp+fn)));
432 double brej = (tn+fp == 0.0 ? 0.0 : (tn / (tn+fp)));
434 sigEff.push_back (seff);
435 backRej.push_back (brej);
438 addPoint (
"ROC", seff, brej);
441 double currentCut = (i * binSizeROC)+minVal;
445 double significance = sig / sqrt (sig + bkg);
446 if (significance > bestSignificance)
448 bestSignificance = significance;
449 bestCutSignificance = currentCut;
452 addPoint (
"Significance", currentCut, significance);
456 m_significances.push_back (bestSignificance);
457 static size_t testCycle = 0;
461 for (
size_t i = 0; i < numBinsData; ++i)
463 addPoint (
"OutputSig", x.at (i), datSig.at (i)/sumWeightsSig);
464 addPoint (
"OutputBkg", x.at (i), datBkg.at (i)/sumWeightsBkg);
474 plot (
"ROC",
"", 2, kRed);
475 plot (
"Significance",
"", 3, kRed);
476 plot (
"OutputSig",
"", 4, kRed);
477 plot (
"OutputBkg",
"same", 4, kBlue);
478 fMonitoring->ProcessEvents ();
481 m_cutValue = bestCutSignificance;
488 bool Settings::hasConverged (
double testError)
492 if (testError < m_minError*0.999)
494 m_convergenceCount = 0;
495 m_minError = testError;
499 ++m_convergenceCount;
500 m_maxConvergenceCount = std::max (m_convergenceCount, m_maxConvergenceCount);
504 if (m_convergenceCount >= convergenceSteps () || testError <= 0)
515 void ClassificationSettings::setWeightSums (
double sumOfSigWeights,
double sumOfBkgWeights)
517 m_sumOfSigWeights = sumOfSigWeights; m_sumOfBkgWeights = sumOfBkgWeights;
523 void ClassificationSettings::setResultComputation (
524 std::string _fileNameNetConfig,
525 std::string _fileNameResult,
526 std::vector<Pattern>* _resultPatternContainer)
528 m_pResultPatternContainer = _resultPatternContainer;
529 m_fileNameResult = _fileNameResult;
530 m_fileNameNetConfig = _fileNameNetConfig;
543 size_t Net::numWeights (
size_t trainingStartLayer)
const
547 size_t prevNodes (inputSize ());
548 for (
auto& layer : m_layers)
550 if (index >= trainingStartLayer)
551 num += layer.numWeights (prevNodes);
552 prevNodes = layer.numNodes ();
559 size_t Net::numNodes (
size_t trainingStartLayer)
const
563 for (
auto& layer : m_layers)
565 if (index >= trainingStartLayer)
566 num += layer.numNodes ();
575 void Net::fillDropContainer (DropContainer& dropContainer,
double dropFraction,
size_t _numNodes)
const
577 size_t numDrops = dropFraction * _numNodes;
578 if (numDrops >= _numNodes)
579 numDrops = _numNodes - 1;
581 dropContainer.insert (end (dropContainer), _numNodes-numDrops,
true);
583 dropContainer.insert (end (dropContainer), numDrops,
false);
585 std::shuffle(end(dropContainer)-_numNodes, end(dropContainer), std::default_random_engine{});