Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
MethodMLP.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Krzysztof Danielowski, Andreas Hoecker, Matt Jachowski, Kamil Kraszewski, Maciej Kruk, Peter Speckmayer, Joerg Stelzer, Eckhard v. Toerne, Jan Therhaag, Jiahang Zhong
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodMLP *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * ANN Multilayer Perceptron class for the discrimination of signal *
12  * from background. BFGS implementation based on TMultiLayerPerceptron *
13  * class from ROOT (http://root.cern.ch). *
14  * *
15  * Authors (alphabetical): *
16  * Krzysztof Danielowski <danielow@cern.ch> - IFJ & AGH, Poland *
17  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
18  * Matt Jachowski <jachowski@stanford.edu> - Stanford University, USA *
19  * Kamil Kraszewski <kalq@cern.ch> - IFJ & UJ, Poland *
20  * Maciej Kruk <mkruk@cern.ch> - IFJ & AGH, Poland *
21  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
22  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
23  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
24  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
25  * Jiahang Zhong <Jiahang.Zhong@cern.ch> - Academia Sinica, Taipei *
26  * *
27  * Copyright (c) 2005-2011: *
28  * CERN, Switzerland *
29  * U. of Victoria, Canada *
30  * MPI-K Heidelberg, Germany *
31  * U. of Bonn, Germany *
32  * *
33  * Redistribution and use in source and binary forms, with or without *
34  * modification, are permitted according to the terms listed in LICENSE *
35  * (http://tmva.sourceforge.net/LICENSE) *
36  **********************************************************************************/
37 
38 /*! \class TMVA::MethodMLP
39 \ingroup TMVA
40 
41 Multilayer Perceptron class built off of MethodANNBase
42 
43 */
44 
45 #include "TMVA/MethodMLP.h"
46 
47 #include "TMVA/Config.h"
48 #include "TMVA/Configurable.h"
49 #include "TMVA/ConvergenceTest.h"
50 #include "TMVA/ClassifierFactory.h"
51 #include "TMVA/DataSet.h"
52 #include "TMVA/DataSetInfo.h"
53 #include "TMVA/FitterBase.h"
54 #include "TMVA/GeneticFitter.h"
55 #include "TMVA/IFitterTarget.h"
56 #include "TMVA/IMethod.h"
57 #include "TMVA/Interval.h"
58 #include "TMVA/MethodANNBase.h"
59 #include "TMVA/MsgLogger.h"
60 #include "TMVA/TNeuron.h"
61 #include "TMVA/TSynapse.h"
62 #include "TMVA/Timer.h"
63 #include "TMVA/Tools.h"
64 #include "TMVA/Types.h"
65 
66 #include "TH1.h"
67 #include "TString.h"
68 #include "TTree.h"
69 #include "Riostream.h"
70 #include "TFitter.h"
71 #include "TMatrixD.h"
72 #include "TMath.h"
73 #include "TFile.h"
74 
75 #include <cmath>
76 #include <vector>
77 
78 #ifdef MethodMLP_UseMinuit__
79 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
80 Bool_t MethodMLP_UseMinuit = kTRUE;
81 #endif
82 
83 REGISTER_METHOD(MLP)
84 
85 ClassImp(TMVA::MethodMLP);
86 
87  using std::vector;
88 
89 ////////////////////////////////////////////////////////////////////////////////
90 /// standard constructor
91 
92 TMVA::MethodMLP::MethodMLP( const TString& jobName,
93  const TString& methodTitle,
94  DataSetInfo& theData,
95  const TString& theOption)
96  : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
97  fUseRegulator(false), fCalculateErrors(false),
98  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
100  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101  fSamplingTraining(false), fSamplingTesting(false),
102  fLastAlpha(0.0), fTau(0.),
103  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104  fBPMode(kSequential), fBpModeS("None"),
105  fBatchSize(0), fTestRate(0), fEpochMon(false),
106  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107  fGA_SC_rate(0), fGA_SC_factor(0.0),
108  fDeviationsFromTargets(0),
109  fWeightRange (1.0)
110 {
111 
112 }
113 
114 ////////////////////////////////////////////////////////////////////////////////
115 /// constructor from a weight file
116 
117 TMVA::MethodMLP::MethodMLP( DataSetInfo& theData,
118  const TString& theWeightFile)
119  : MethodANNBase( Types::kMLP, theData, theWeightFile),
120  fUseRegulator(false), fCalculateErrors(false),
121  fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122  fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
123  fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124  fSamplingTraining(false), fSamplingTesting(false),
125  fLastAlpha(0.0), fTau(0.),
126  fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127  fBPMode(kSequential), fBpModeS("None"),
128  fBatchSize(0), fTestRate(0), fEpochMon(false),
129  fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130  fGA_SC_rate(0), fGA_SC_factor(0.0),
131  fDeviationsFromTargets(0),
132  fWeightRange (1.0)
133 {
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// destructor
138 /// nothing to be done
139 
140 TMVA::MethodMLP::~MethodMLP()
141 {
142 }
143 
144 void TMVA::MethodMLP::Train()
145 {
146  Train(NumCycles());
147 }
148 
149 
150 
151 ////////////////////////////////////////////////////////////////////////////////
152 /// MLP can handle classification with 2 classes and regression with one regression-target
153 
154 Bool_t TMVA::MethodMLP::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t /*numberTargets*/ )
155 {
156  if (type == Types::kClassification && numberClasses == 2 ) return kTRUE;
157  if (type == Types::kMulticlass ) return kTRUE;
158  if (type == Types::kRegression ) return kTRUE;
159 
160  return kFALSE;
161 }
162 
163 ////////////////////////////////////////////////////////////////////////////////
164 /// default initializations
165 
166 void TMVA::MethodMLP::Init()
167 {
168  // the minimum requirement to declare an event signal-like
169  SetSignalReferenceCut( 0.5 );
170 #ifdef MethodMLP_UseMinuit__
171  fgThis = this;
172 #endif
173 }
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 /// define the options (their key words) that can be set in the option string
177 ///
178 /// know options:
179 ///
180 /// - TrainingMethod <string> Training method
181 /// available values are:
182 /// - BP Back-Propagation <default>
183 /// - GA Genetic Algorithm (takes a LONG time)
184 ///
185 /// - LearningRate <float> NN learning rate parameter
186 /// - DecayRate <float> Decay rate for learning parameter
187 /// - TestRate <int> Test for overtraining performed at each #th epochs
188 ///
189 /// - BPMode <string> Back-propagation learning mode
190 /// available values are:
191 /// - sequential <default>
192 /// - batch
193 ///
194 /// - BatchSize <int> Batch size: number of events/batch, only set if in Batch Mode,
195 /// - -1 for BatchSize=number_of_events
196 
197 void TMVA::MethodMLP::DeclareOptions()
198 {
199  DeclareOptionRef(fTrainMethodS="BP", "TrainingMethod",
200  "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
201  AddPreDefVal(TString("BP"));
202  AddPreDefVal(TString("GA"));
203  AddPreDefVal(TString("BFGS"));
204 
205  DeclareOptionRef(fLearnRate=0.02, "LearningRate", "ANN learning rate parameter");
206  DeclareOptionRef(fDecayRate=0.01, "DecayRate", "Decay rate for learning parameter");
207  DeclareOptionRef(fTestRate =10, "TestRate", "Test for overtraining performed at each #th epochs");
208  DeclareOptionRef(fEpochMon = kFALSE, "EpochMonitoring", "Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
209 
210  DeclareOptionRef(fSamplingFraction=1.0, "Sampling","Only 'Sampling' (randomly selected) events are trained each epoch");
211  DeclareOptionRef(fSamplingEpoch=1.0, "SamplingEpoch","Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212  DeclareOptionRef(fSamplingWeight=1.0, "SamplingImportance"," The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
213 
214  DeclareOptionRef(fSamplingTraining=kTRUE, "SamplingTraining","The training sample is sampled");
215  DeclareOptionRef(fSamplingTesting= kFALSE, "SamplingTesting" ,"The testing sample is sampled");
216 
217  DeclareOptionRef(fResetStep=50, "ResetStep", "How often BFGS should reset history");
218  DeclareOptionRef(fTau =3.0, "Tau", "LineSearch \"size step\"");
219 
220  DeclareOptionRef(fBpModeS="sequential", "BPMode",
221  "Back-propagation learning mode: sequential or batch");
222  AddPreDefVal(TString("sequential"));
223  AddPreDefVal(TString("batch"));
224 
225  DeclareOptionRef(fBatchSize=-1, "BatchSize",
226  "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
227 
228  DeclareOptionRef(fImprovement=1e-30, "ConvergenceImprove",
229  "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
230 
231  DeclareOptionRef(fSteps=-1, "ConvergenceTests",
232  "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
233 
234  DeclareOptionRef(fUseRegulator=kFALSE, "UseRegulator",
235  "Use regulator to avoid over-training"); //zjh
236  DeclareOptionRef(fUpdateLimit=10000, "UpdateLimit",
237  "Maximum times of regulator update"); //zjh
238  DeclareOptionRef(fCalculateErrors=kFALSE, "CalculateErrors",
239  "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value"); //zjh
240 
241  DeclareOptionRef(fWeightRange=1.0, "WeightRange",
242  "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
243 
244 }
245 
246 ////////////////////////////////////////////////////////////////////////////////
247 /// process user options
248 
249 void TMVA::MethodMLP::ProcessOptions()
250 {
251  MethodANNBase::ProcessOptions();
252 
253 
254  if (IgnoreEventsWithNegWeightsInTraining()) {
255  Log() << kINFO
256  << "Will ignore negative events in training!"
257  << Endl;
258  }
259 
260 
261  if (fTrainMethodS == "BP" ) fTrainingMethod = kBP;
262  else if (fTrainMethodS == "BFGS") fTrainingMethod = kBFGS;
263  else if (fTrainMethodS == "GA" ) fTrainingMethod = kGA;
264 
265  if (fBpModeS == "sequential") fBPMode = kSequential;
266  else if (fBpModeS == "batch") fBPMode = kBatch;
267 
268  // InitializeLearningRates();
269 
270  if (fBPMode == kBatch) {
271  Data()->SetCurrentType(Types::kTraining);
272  Int_t numEvents = Data()->GetNEvents();
273  if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
274  }
275 }
276 
277 ////////////////////////////////////////////////////////////////////////////////
278 /// initialize learning rates of synapses, used only by back propagation
279 
280 void TMVA::MethodMLP::InitializeLearningRates()
281 {
282  Log() << kDEBUG << "Initialize learning rates" << Endl;
283  TSynapse *synapse;
284  Int_t numSynapses = fSynapses->GetEntriesFast();
285  for (Int_t i = 0; i < numSynapses; i++) {
286  synapse = (TSynapse*)fSynapses->At(i);
287  synapse->SetLearningRate(fLearnRate);
288  }
289 }
290 
291 ////////////////////////////////////////////////////////////////////////////////
292 /// calculate the estimator that training is attempting to minimize
293 
294 Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType, Int_t iEpoch )
295 {
296  // sanity check
297  if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
298  Log() << kFATAL << "<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
299  }
300 
301  Types::ETreeType saveType = Data()->GetCurrentType();
302  Data()->SetCurrentType(treeType);
303 
304  // if epochs are counted create monitoring histograms (only available for classification)
305  TString type = (treeType == Types::kTraining ? "train" : "test");
306  TString name = Form("convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
307  TString nameB = name + "_B";
308  TString nameS = name + "_S";
309  Int_t nbin = 100;
310  Float_t limit = 2;
311  TH1* histS = 0;
312  TH1* histB = 0;
313  if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314  histS = new TH1F( nameS, nameS, nbin, -limit, limit );
315  histB = new TH1F( nameB, nameB, nbin, -limit, limit );
316  }
317 
318  Double_t estimator = 0;
319 
320  // loop over all training events
321  Int_t nEvents = GetNEvents();
322  UInt_t nClasses = DataInfo().GetNClasses();
323  UInt_t nTgts = DataInfo().GetNTargets();
324 
325 
326  Float_t sumOfWeights = 0.f;
327  if( fWeightRange < 1.f ){
328  fDeviationsFromTargets = new std::vector<std::pair<Float_t,Float_t> >(nEvents);
329  }
330 
331  for (Int_t i = 0; i < nEvents; i++) {
332 
333  const Event* ev = GetEvent(i);
334 
335  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
336  && (saveType == Types::kTraining)){
337  continue;
338  }
339 
340  Double_t w = ev->GetWeight();
341 
342  ForceNetworkInputs( ev );
343  ForceNetworkCalculations();
344 
345  Double_t d = 0, v = 0;
346  if (DoRegression()) {
347  for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
348  v = GetOutputNeuron( itgt )->GetActivationValue();
349  Double_t targetValue = ev->GetTarget( itgt );
350  Double_t dt = v - targetValue;
351  d += (dt*dt);
352  }
353  estimator += d*w;
354  } else if (DoMulticlass() ) {
355  UInt_t cls = ev->GetClass();
356  if (fEstimator==kCE){
357  Double_t norm(0);
358  for (UInt_t icls = 0; icls < nClasses; icls++) {
359  Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360  norm += exp( activationValue );
361  if(icls==cls)
362  d = exp( activationValue );
363  }
364  d = -TMath::Log(d/norm);
365  }
366  else{
367  for (UInt_t icls = 0; icls < nClasses; icls++) {
368  Double_t desired = (icls==cls) ? 1.0 : 0.0;
369  v = GetOutputNeuron( icls )->GetActivationValue();
370  d = (desired-v)*(desired-v);
371  }
372  }
373  estimator += d*w; //zjh
374  } else {
375  Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376  v = GetOutputNeuron()->GetActivationValue();
377  if (fEstimator==kMSE) d = (desired-v)*(desired-v); //zjh
378  else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v)); //zjh
379  estimator += d*w; //zjh
380  }
381 
382  if( fDeviationsFromTargets )
383  fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
384 
385  sumOfWeights += w;
386 
387 
388  // fill monitoring histograms
389  if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill( float(v), float(w) );
390  else if (histB != 0) histB->Fill( float(v), float(w) );
391  }
392 
393 
394  if( fDeviationsFromTargets ) {
395  std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
396 
397  Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
398  estimator = 0.f;
399 
400  Float_t weightRangeCut = fWeightRange*sumOfWeights;
401  Float_t weightSum = 0.f;
402  for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403  float deviation = (*itDev).first;
404  float devWeight = (*itDev).second;
405  weightSum += devWeight; // add the weight of this event
406  if( weightSum <= weightRangeCut ) { // if within the region defined by fWeightRange
407  estimator += devWeight*deviation;
408  }
409  }
410 
411  sumOfWeights = sumOfWeightsInRange;
412  delete fDeviationsFromTargets;
413  }
414 
415  if (histS != 0) fEpochMonHistS.push_back( histS );
416  if (histB != 0) fEpochMonHistB.push_back( histB );
417 
418  //if (DoRegression()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
419  //else if (DoMulticlass()) estimator = TMath::Sqrt(estimator/Float_t(nEvents));
420  //else estimator = estimator*0.5/Float_t(nEvents);
421  estimator = estimator/Float_t(sumOfWeights);
422 
423 
424  //if (fUseRegulator) estimator+=fPrior/Float_t(nEvents); //zjh
425 
426  Data()->SetCurrentType( saveType );
427 
428  // provide epoch-wise monitoring
429  if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
430  CreateWeightMonitoringHists( Form("epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
431  }
432 
433  return estimator;
434 }
435 
436 ////////////////////////////////////////////////////////////////////////////////
437 
438 void TMVA::MethodMLP::Train(Int_t nEpochs)
439 {
440  if (fNetwork == 0) {
441  //Log() << kERROR <<"ANN Network is not initialized, doing it now!"<< Endl;
442  Log() << kFATAL <<"ANN Network is not initialized, doing it now!"<< Endl;
443  SetAnalysisType(GetAnalysisType());
444  }
445  Log() << kDEBUG << "reinitialize learning rates" << Endl;
446  InitializeLearningRates();
447  Log() << kHEADER;
448  PrintMessage("Training Network");
449  Log() << Endl;
450  Int_t nEvents=GetNEvents();
451  Int_t nSynapses=fSynapses->GetEntriesFast();
452  if (nSynapses>nEvents)
453  Log()<<kWARNING<<"ANN too complicated: #events="<<nEvents<<"\t#synapses="<<nSynapses<<Endl;
454 
455  fIPyMaxIter = nEpochs;
456  if (fInteractive && fInteractive->NotInitialized()){
457  std::vector<TString> titles = {"Error on training set", "Error on test set"};
458  fInteractive->Init(titles);
459  }
460 
461 #ifdef MethodMLP_UseMinuit__
462  if (useMinuit) MinuitMinimize();
463 #else
464  if (fTrainingMethod == kGA) GeneticMinimize();
465  else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
466  else BackPropagationMinimize(nEpochs);
467 #endif
468 
469  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
470  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
471  if (fUseRegulator){
472  Log()<<kINFO<<"Finalizing handling of Regulator terms, trainE="<<trainE<<" testE="<<testE<<Endl;
473  UpdateRegulators();
474  Log()<<kINFO<<"Done with handling of Regulator terms"<<Endl;
475  }
476 
477  if( fCalculateErrors || fUseRegulator )
478  {
479  Int_t numSynapses=fSynapses->GetEntriesFast();
480  fInvHessian.ResizeTo(numSynapses,numSynapses);
481  GetApproxInvHessian( fInvHessian ,false);
482  }
483  ExitFromTraining();
484 }
485 
486 ////////////////////////////////////////////////////////////////////////////////
487 /// train network with BFGS algorithm
488 
489 void TMVA::MethodMLP::BFGSMinimize( Int_t nEpochs )
490 {
491  Timer timer( (fSteps>0?100:nEpochs), GetName() );
492 
493  // create histograms for overtraining monitoring
494  Int_t nbinTest = Int_t(nEpochs/fTestRate);
495  if(!IsSilentFile())
496  {
497  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
498  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
499  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
500  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
501  }
502 
503  Int_t nSynapses = fSynapses->GetEntriesFast();
504  Int_t nWeights = nSynapses;
505 
506  for (Int_t i=0;i<nSynapses;i++) {
507  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
508  synapse->SetDEDw(0.0);
509  }
510 
511  std::vector<Double_t> buffer( nWeights );
512  for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
513 
514  TMatrixD Dir ( nWeights, 1 );
515  TMatrixD Hessian ( nWeights, nWeights );
516  TMatrixD Gamma ( nWeights, 1 );
517  TMatrixD Delta ( nWeights, 1 );
518  Int_t RegUpdateCD=0; //zjh
519  Int_t RegUpdateTimes=0; //zjh
520  Double_t AccuError=0;
521 
522  Double_t trainE = -1;
523  Double_t testE = -1;
524 
525  fLastAlpha = 0.;
526 
527  if(fSamplingTraining || fSamplingTesting)
528  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
529 
530  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
531  timer.DrawProgressBar( 0 );
532 
533  // start training cycles (epochs)
534  for (Int_t i = 0; i < nEpochs; i++) {
535 
536  if (fExitFromTraining) break;
537  fIPyCurrentIter = i;
538  if (Float_t(i)/nEpochs < fSamplingEpoch) {
539  if ((i+1)%fTestRate == 0 || (i == 0)) {
540  if (fSamplingTraining) {
541  Data()->SetCurrentType( Types::kTraining );
542  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
543  Data()->CreateSampling();
544  }
545  if (fSamplingTesting) {
546  Data()->SetCurrentType( Types::kTesting );
547  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
548  Data()->CreateSampling();
549  }
550  }
551  }
552  else {
553  Data()->SetCurrentType( Types::kTraining );
554  Data()->InitSampling(1.0,1.0);
555  Data()->SetCurrentType( Types::kTesting );
556  Data()->InitSampling(1.0,1.0);
557  }
558  Data()->SetCurrentType( Types::kTraining );
559 
560  //zjh
561  if (fUseRegulator) {
562  UpdatePriors();
563  RegUpdateCD++;
564  }
565  //zjh
566 
567  SetGammaDelta( Gamma, Delta, buffer );
568 
569  if (i % fResetStep == 0 && i<0.5*nEpochs) { //zjh
570  SteepestDir( Dir );
571  Hessian.UnitMatrix();
572  RegUpdateCD=0; //zjh
573  }
574  else {
575  if (GetHessian( Hessian, Gamma, Delta )) {
576  SteepestDir( Dir );
577  Hessian.UnitMatrix();
578  RegUpdateCD=0; //zjh
579  }
580  else SetDir( Hessian, Dir );
581  }
582 
583  Double_t dError=0; //zjh
584  if (DerivDir( Dir ) > 0) {
585  SteepestDir( Dir );
586  Hessian.UnitMatrix();
587  RegUpdateCD=0; //zjh
588  }
589  if (LineSearch( Dir, buffer, &dError )) { //zjh
590  Hessian.UnitMatrix();
591  SteepestDir( Dir );
592  RegUpdateCD=0; //zjh
593  if (LineSearch(Dir, buffer, &dError)) { //zjh
594  i = nEpochs;
595  Log() << kFATAL << "Line search failed! Huge troubles somewhere..." << Endl;
596  }
597  }
598 
599  //zjh+
600  if (dError<0) Log()<<kWARNING<<"\nnegative dError=" <<dError<<Endl;
601  AccuError+=dError;
602 
603  if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
604  Log()<<kDEBUG<<"\n\nUpdate regulators "<<RegUpdateTimes<<" on epoch "<<i<<"\tdError="<<dError<<Endl;
605  UpdateRegulators();
606  Hessian.UnitMatrix();
607  RegUpdateCD=0;
608  RegUpdateTimes++;
609  AccuError=0;
610  }
611  //zjh-
612 
613  // monitor convergence of training and control sample
614  if ((i+1)%fTestRate == 0) {
615  //trainE = CalculateEstimator( Types::kTraining, i ) - fPrior/Float_t(GetNEvents()); // estimator for training sample //zjh
616  //testE = CalculateEstimator( Types::kTesting, i ) - fPrior/Float_t(GetNEvents()); // estimator for test sample //zjh
617  trainE = CalculateEstimator( Types::kTraining, i ) ; // estimator for training sample //zjh
618  testE = CalculateEstimator( Types::kTesting, i ) ; // estimator for test sample //zjh
619  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
620  if(!IsSilentFile()) //saved to see in TMVAGui, no needed without file
621  {
622  fEstimatorHistTrain->Fill( i+1, trainE );
623  fEstimatorHistTest ->Fill( i+1, testE );
624  }
625  Bool_t success = kFALSE;
626  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
627  success = kTRUE;
628  }
629  Data()->EventResult( success );
630 
631  SetCurrentValue( testE );
632  if (HasConverged()) {
633  if (Float_t(i)/nEpochs < fSamplingEpoch) {
634  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
635  i = newEpoch;
636  ResetConvergenceCounter();
637  }
638  else break;
639  }
640  }
641 
642  // draw progress
643  TString convText = Form( "<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i ); //zjh
644  if (fSteps > 0) {
645  Float_t progress = 0;
646  if (Float_t(i)/nEpochs < fSamplingEpoch)
647  // progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
648  progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
649  else
650  {
651  // progress = 100.0*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
652  progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
653  }
654  Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit; //zjh
655  if (progress2>progress) progress=progress2; //zjh
656  timer.DrawProgressBar( Int_t(progress), convText );
657  }
658  else {
659  Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit)); //zjh
660  if (progress<i) progress=i; //zjh
661  timer.DrawProgressBar( progress, convText ); //zjh
662  }
663 
664  // some verbose output
665  if (fgPRINT_SEQ) {
666  PrintNetwork();
667  WaitForKeyboard();
668  }
669  }
670 }
671 
672 ////////////////////////////////////////////////////////////////////////////////
673 
674 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
675 {
676  Int_t nWeights = fSynapses->GetEntriesFast();
677 
678  Int_t IDX = 0;
679  Int_t nSynapses = fSynapses->GetEntriesFast();
680  for (Int_t i=0;i<nSynapses;i++) {
681  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
682  Gamma[IDX++][0] = -synapse->GetDEDw();
683  }
684 
685  for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
686 
687  ComputeDEDw();
688 
689  IDX = 0;
690  for (Int_t i=0;i<nSynapses;i++)
691  {
692  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
693  Gamma[IDX++][0] += synapse->GetDEDw();
694  }
695 }
696 
697 ////////////////////////////////////////////////////////////////////////////////
698 
699 void TMVA::MethodMLP::ComputeDEDw()
700 {
701  Int_t nSynapses = fSynapses->GetEntriesFast();
702  for (Int_t i=0;i<nSynapses;i++) {
703  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
704  synapse->SetDEDw( 0.0 );
705  }
706 
707  Int_t nEvents = GetNEvents();
708  Int_t nPosEvents = nEvents;
709  for (Int_t i=0;i<nEvents;i++) {
710 
711  const Event* ev = GetEvent(i);
712  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
713  && (Data()->GetCurrentType() == Types::kTraining)){
714  --nPosEvents;
715  continue;
716  }
717 
718  SimulateEvent( ev );
719 
720  for (Int_t j=0;j<nSynapses;j++) {
721  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
722  synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
723  }
724  }
725 
726  for (Int_t i=0;i<nSynapses;i++) {
727  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
728  Double_t DEDw=synapse->GetDEDw(); //zjh
729  if (fUseRegulator) DEDw+=fPriorDev[i]; //zjh
730  synapse->SetDEDw( DEDw / nPosEvents ); //zjh
731  }
732 }
733 
734 ////////////////////////////////////////////////////////////////////////////////
735 
736 void TMVA::MethodMLP::SimulateEvent( const Event* ev )
737 {
738  Double_t eventWeight = ev->GetWeight();
739 
740  ForceNetworkInputs( ev );
741  ForceNetworkCalculations();
742 
743  if (DoRegression()) {
744  UInt_t ntgt = DataInfo().GetNTargets();
745  for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
746  Double_t desired = ev->GetTarget(itgt);
747  Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
748  GetOutputNeuron( itgt )->SetError(error);
749  }
750  } else if (DoMulticlass()) {
751  UInt_t nClasses = DataInfo().GetNClasses();
752  UInt_t cls = ev->GetClass();
753  for (UInt_t icls = 0; icls < nClasses; icls++) {
754  Double_t desired = ( cls==icls ? 1.0 : 0.0 );
755  Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
756  GetOutputNeuron( icls )->SetError(error);
757  }
758  } else {
759  Double_t desired = GetDesiredOutput( ev );
760  Double_t error=-1; //zjh
761  if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight; //zjh
762  else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
763  GetOutputNeuron()->SetError(error);
764  }
765 
766  CalculateNeuronDeltas();
767  for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
768  TSynapse *synapse = (TSynapse*)fSynapses->At(j);
769  synapse->InitDelta();
770  synapse->CalculateDelta();
771  }
772 }
773 
774 ////////////////////////////////////////////////////////////////////////////////
775 
776 void TMVA::MethodMLP::SteepestDir( TMatrixD &Dir )
777 {
778  Int_t IDX = 0;
779  Int_t nSynapses = fSynapses->GetEntriesFast();
780 
781  for (Int_t i=0;i<nSynapses;i++) {
782  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
783  Dir[IDX++][0] = -synapse->GetDEDw();
784  }
785 }
786 
787 ////////////////////////////////////////////////////////////////////////////////
788 
789 Bool_t TMVA::MethodMLP::GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta )
790 {
791  TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
792  if ((Double_t) gd[0][0] == 0.) return kTRUE;
793  TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
794  TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
795  TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
796  Double_t a = 1 / (Double_t) gd[0][0];
797  Double_t f = 1 + ((Double_t)gHg[0][0]*a);
798  TMatrixD res(TMatrixD(Delta, TMatrixD::kMult, TMatrixD(TMatrixD::kTransposed,Delta)));
799  res *= f;
800  res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
801  TMatrixD(TMatrixD::kTransposed,Delta)));
802  res *= a;
803  Hessian += res;
804 
805  return kFALSE;
806 }
807 
808 ////////////////////////////////////////////////////////////////////////////////
809 
810 void TMVA::MethodMLP::SetDir( TMatrixD &Hessian, TMatrixD &dir )
811 {
812  Int_t IDX = 0;
813  Int_t nSynapses = fSynapses->GetEntriesFast();
814  TMatrixD DEDw(nSynapses, 1);
815 
816  for (Int_t i=0;i<nSynapses;i++) {
817  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
818  DEDw[IDX++][0] = synapse->GetDEDw();
819  }
820 
821  dir = Hessian * DEDw;
822  for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
823 }
824 
825 ////////////////////////////////////////////////////////////////////////////////
826 
827 Double_t TMVA::MethodMLP::DerivDir( TMatrixD &Dir )
828 {
829  Int_t IDX = 0;
830  Int_t nSynapses = fSynapses->GetEntriesFast();
831  Double_t Result = 0.0;
832 
833  for (Int_t i=0;i<nSynapses;i++) {
834  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
835  Result += Dir[IDX++][0] * synapse->GetDEDw();
836  }
837  return Result;
838 }
839 
840 ////////////////////////////////////////////////////////////////////////////////
841 
842 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
843 {
844  Int_t IDX = 0;
845  Int_t nSynapses = fSynapses->GetEntriesFast();
846  Int_t nWeights = nSynapses;
847 
848  std::vector<Double_t> Origin(nWeights);
849  for (Int_t i=0;i<nSynapses;i++) {
850  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
851  Origin[i] = synapse->GetWeight();
852  }
853 
854  Double_t err1 = GetError();
855  Double_t errOrigin=err1;//zjh
856  Double_t alpha1 = 0.;
857  Double_t alpha2 = fLastAlpha;
858 
859 
860  if (alpha2 < 0.01) alpha2 = 0.01;
861  else if (alpha2 > 2.0) alpha2 = 2.0;
862  Double_t alpha_original = alpha2;
863  Double_t alpha3 = alpha2;
864 
865  SetDirWeights( Origin, Dir, alpha2 );
866  Double_t err2 = GetError();
867  //Double_t err2 = err1;
868  Double_t err3 = err2;
869  Bool_t bingo = kFALSE;
870 
871 
872  if (err1 > err2) {
873  for (Int_t i=0;i<100;i++) {
874  alpha3 *= fTau;
875  SetDirWeights(Origin, Dir, alpha3);
876  err3 = GetError();
877  if (err3 > err2) {
878  bingo = kTRUE;
879  break;
880  }
881  alpha1 = alpha2;
882  err1 = err2;
883  alpha2 = alpha3;
884  err2 = err3;
885  }
886  if (!bingo) {
887  SetDirWeights(Origin, Dir, 0.);
888  return kTRUE;
889  }
890  }
891  else {
892  for (Int_t i=0;i<100;i++) {
893  alpha2 /= fTau;
894  if (i==50) {
895  Log() << kWARNING << "linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
896  alpha2 = -alpha_original;
897  }
898  SetDirWeights(Origin, Dir, alpha2);
899  err2 = GetError();
900  if (err1 > err2) {
901  bingo = kTRUE;
902  break;
903  }
904  alpha3 = alpha2;
905  err3 = err2;
906  }
907  if (!bingo) {
908  SetDirWeights(Origin, Dir, 0.);
909  Log() << kWARNING << "linesearch, failed even in opposite direction of steepestDIR" << Endl;
910  fLastAlpha = 0.05;
911  return kTRUE;
912  }
913  }
914 
915  if (alpha1>0 && alpha2>0 && alpha3 > 0) {
916  fLastAlpha = 0.5 * (alpha1 + alpha3 -
917  (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
918  - ( err2 - err1 ) / (alpha2 - alpha1 )));
919  }
920  else {
921  fLastAlpha = alpha2;
922  }
923 
924  fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
925 
926  SetDirWeights(Origin, Dir, fLastAlpha);
927 
928  // leaving these lines uncommented is a heavy price to pay for only a warning message
929  // (which shouldn't appear anyway)
930  // --> about 15% of time is spent in the final GetError().
931  //
932  Double_t finalError = GetError();
933  if (finalError > err1) {
934  Log() << kWARNING << "Line search increased error! Something is wrong."
935  << "fLastAlpha=" << fLastAlpha << "al123=" << alpha1 << " "
936  << alpha2 << " " << alpha3 << " err1="<< err1 << " errfinal=" << finalError << Endl;
937  }
938 
939  for (Int_t i=0;i<nSynapses;i++) {
940  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
941  buffer[IDX] = synapse->GetWeight() - Origin[IDX];
942  IDX++;
943  }
944 
945  if (dError) (*dError)=(errOrigin-finalError)/finalError; //zjh
946 
947  return kFALSE;
948 }
949 
950 ////////////////////////////////////////////////////////////////////////////////
951 
952 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
953 {
954  Int_t IDX = 0;
955  Int_t nSynapses = fSynapses->GetEntriesFast();
956 
957  for (Int_t i=0;i<nSynapses;i++) {
958  TSynapse *synapse = (TSynapse*)fSynapses->At(i);
959  synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
960  IDX++;
961  }
962  if (fUseRegulator) UpdatePriors();//zjh
963 }
964 
965 
966 ////////////////////////////////////////////////////////////////////////////////
967 
968 Double_t TMVA::MethodMLP::GetError()
969 {
970  Int_t nEvents = GetNEvents();
971  UInt_t ntgts = GetNTargets();
972  Double_t Result = 0.;
973 
974  for (Int_t i=0;i<nEvents;i++) {
975  const Event* ev = GetEvent(i);
976 
977  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
978  && (Data()->GetCurrentType() == Types::kTraining)){
979  continue;
980  }
981  SimulateEvent( ev );
982 
983  Double_t error = 0.;
984  if (DoRegression()) {
985  for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
986  error += GetMSEErr( ev, itgt );//zjh
987  }
988  } else if ( DoMulticlass() ){
989  for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
990  error += GetMSEErr( ev, icls );
991  }
992  } else {
993  if (fEstimator==kMSE) error = GetMSEErr( ev ); //zjh
994  else if (fEstimator==kCE) error= GetCEErr( ev ); //zjh
995  }
996  Result += error * ev->GetWeight();
997  }
998  if (fUseRegulator) Result+=fPrior; //zjh
999  if (Result<0) Log()<<kWARNING<<"\nNegative Error!!! :"<<Result-fPrior<<"+"<<fPrior<<Endl;
1000  return Result;
1001 }
1002 
1003 ////////////////////////////////////////////////////////////////////////////////
1004 
1005 Double_t TMVA::MethodMLP::GetMSEErr( const Event* ev, UInt_t index )
1006 {
1007  Double_t error = 0;
1008  Double_t output = GetOutputNeuron( index )->GetActivationValue();
1009  Double_t target = 0;
1010  if (DoRegression()) target = ev->GetTarget( index );
1011  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1012  else target = GetDesiredOutput( ev );
1013 
1014  error = 0.5*(output-target)*(output-target); //zjh
1015 
1016  return error;
1017 
1018 }
1019 
1020 ////////////////////////////////////////////////////////////////////////////////
1021 
1022 Double_t TMVA::MethodMLP::GetCEErr( const Event* ev, UInt_t index ) //zjh
1023 {
1024  Double_t error = 0;
1025  Double_t output = GetOutputNeuron( index )->GetActivationValue();
1026  Double_t target = 0;
1027  if (DoRegression()) target = ev->GetTarget( index );
1028  else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1029  else target = GetDesiredOutput( ev );
1030 
1031  error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1032 
1033  return error;
1034 }
1035 
1036 ////////////////////////////////////////////////////////////////////////////////
1037 /// minimize estimator / train network with back propagation algorithm
1038 
1039 void TMVA::MethodMLP::BackPropagationMinimize(Int_t nEpochs)
1040 {
1041  // Timer timer( nEpochs, GetName() );
1042  Timer timer( (fSteps>0?100:nEpochs), GetName() );
1043  Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1044 
1045  // create histograms for overtraining monitoring
1046  Int_t nbinTest = Int_t(nEpochs/fTestRate);
1047  if(!IsSilentFile())
1048  {
1049  fEstimatorHistTrain = new TH1F( "estimatorHistTrain", "training estimator",
1050  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1051  fEstimatorHistTest = new TH1F( "estimatorHistTest", "test estimator",
1052  nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1053  }
1054  if(fSamplingTraining || fSamplingTesting)
1055  Data()->InitSampling(1.0,1.0,fRandomSeed); // initialize sampling to initialize the random generator with the given seed
1056 
1057  if (fSteps > 0) Log() << kINFO << "Inaccurate progress timing for MLP... " << Endl;
1058  timer.DrawProgressBar(0);
1059 
1060  // estimators
1061  Double_t trainE = -1;
1062  Double_t testE = -1;
1063 
1064  // start training cycles (epochs)
1065  for (Int_t i = 0; i < nEpochs; i++) {
1066 
1067  if (fExitFromTraining) break;
1068  fIPyCurrentIter = i;
1069  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1070  if ((i+1)%fTestRate == 0 || (i == 0)) {
1071  if (fSamplingTraining) {
1072  Data()->SetCurrentType( Types::kTraining );
1073  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1074  Data()->CreateSampling();
1075  }
1076  if (fSamplingTesting) {
1077  Data()->SetCurrentType( Types::kTesting );
1078  Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1079  Data()->CreateSampling();
1080  }
1081  }
1082  }
1083  else {
1084  Data()->SetCurrentType( Types::kTraining );
1085  Data()->InitSampling(1.0,1.0);
1086  Data()->SetCurrentType( Types::kTesting );
1087  Data()->InitSampling(1.0,1.0);
1088  }
1089  Data()->SetCurrentType( Types::kTraining );
1090 
1091  TrainOneEpoch();
1092  DecaySynapseWeights(i >= lateEpoch);
1093 
1094  // monitor convergence of training and control sample
1095  if ((i+1)%fTestRate == 0) {
1096  trainE = CalculateEstimator( Types::kTraining, i ); // estimator for training sample
1097  testE = CalculateEstimator( Types::kTesting, i ); // estimator for test sample
1098  if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1099  if(!IsSilentFile())
1100  {
1101  fEstimatorHistTrain->Fill( i+1, trainE );
1102  fEstimatorHistTest ->Fill( i+1, testE );
1103  }
1104  Bool_t success = kFALSE;
1105  if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1106  success = kTRUE;
1107  }
1108  Data()->EventResult( success );
1109 
1110  SetCurrentValue( testE );
1111  if (HasConverged()) {
1112  if (Float_t(i)/nEpochs < fSamplingEpoch) {
1113  Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1114  i = newEpoch;
1115  ResetConvergenceCounter();
1116  }
1117  else {
1118  if (lateEpoch > i) lateEpoch = i;
1119  else break;
1120  }
1121  }
1122  }
1123 
1124  // draw progress bar (add convergence value)
1125  TString convText = Form( "<D^2> (train/test): %.4g/%.4g", trainE, testE );
1126  if (fSteps > 0) {
1127  Float_t progress = 0;
1128  if (Float_t(i)/nEpochs < fSamplingEpoch)
1129  progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1130  else
1131  progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1132 
1133  timer.DrawProgressBar( Int_t(progress), convText );
1134  }
1135  else {
1136  timer.DrawProgressBar( i, convText );
1137  }
1138  }
1139 }
1140 
1141 ////////////////////////////////////////////////////////////////////////////////
1142 /// train network over a single epoch/cycle of events
1143 
1144 void TMVA::MethodMLP::TrainOneEpoch()
1145 {
1146  Int_t nEvents = Data()->GetNEvents();
1147 
1148  // randomize the order events will be presented, important for sequential mode
1149  Int_t* index = new Int_t[nEvents];
1150  for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1151  Shuffle(index, nEvents);
1152 
1153  // loop over all training events
1154  for (Int_t i = 0; i < nEvents; i++) {
1155 
1156  const Event * ev = GetEvent(index[i]);
1157  if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1158  && (Data()->GetCurrentType() == Types::kTraining)){
1159  continue;
1160  }
1161 
1162  TrainOneEvent(index[i]);
1163 
1164  // do adjustments if in batch mode
1165  if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1166  AdjustSynapseWeights();
1167  if (fgPRINT_BATCH) {
1168  PrintNetwork();
1169  WaitForKeyboard();
1170  }
1171  }
1172 
1173  // debug in sequential mode
1174  if (fgPRINT_SEQ) {
1175  PrintNetwork();
1176  WaitForKeyboard();
1177  }
1178  }
1179 
1180  delete[] index;
1181 }
1182 
1183 ////////////////////////////////////////////////////////////////////////////////
1184 /// Input:
1185 /// - index: the array to shuffle
1186 /// - n: the size of the array
1187 /// Output:
1188 /// - index: the shuffled indexes
1189 ///
1190 /// This method is used for sequential training
1191 
1192 void TMVA::MethodMLP::Shuffle(Int_t* index, Int_t n)
1193 {
1194  Int_t j, k;
1195  Int_t a = n - 1;
1196  for (Int_t i = 0; i < n; i++) {
1197  j = (Int_t) (frgen->Rndm() * a);
1198  if (j<n){ // address the 'worries' of coverity
1199  k = index[j];
1200  index[j] = index[i];
1201  index[i] = k;
1202  }
1203  }
1204 }
1205 
1206 ////////////////////////////////////////////////////////////////////////////////
1207 /// decay synapse weights
1208 /// in last 10 epochs, lower learning rate even more to find a good minimum
1209 
1210 void TMVA::MethodMLP::DecaySynapseWeights(Bool_t lateEpoch)
1211 {
1212  TSynapse* synapse;
1213  Int_t numSynapses = fSynapses->GetEntriesFast();
1214  for (Int_t i = 0; i < numSynapses; i++) {
1215  synapse = (TSynapse*)fSynapses->At(i);
1216  if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate)); // In order to lower the learning rate even more, we need to apply sqrt instead of square.
1217  else synapse->DecayLearningRate(fDecayRate);
1218  }
1219 }
1220 
1221 ////////////////////////////////////////////////////////////////////////////////
1222 /// fast per-event training
1223 
1224 void TMVA::MethodMLP::TrainOneEventFast(Int_t ievt, Float_t*& branchVar, Int_t& type)
1225 {
1226  GetEvent(ievt);
1227 
1228  // as soon as we know how to get event weights, get that here
1229 
1230  // note: the normalization of event weights will affect the choice
1231  // of learning rate, one will have to experiment to get the right value.
1232  // in general, if the "average" event weight is 1, the learning rate
1233  // should be good if set around 0.02 (a good value if all event weights are 1)
1234  Double_t eventWeight = 1.0;
1235 
1236  // get the desired output of this event
1237  Double_t desired;
1238  if (type == 0) desired = fOutput->GetMin(); // background //zjh
1239  else desired = fOutput->GetMax(); // signal //zjh
1240 
1241  // force the value for each input neuron
1242  Double_t x;
1243  TNeuron* neuron;
1244 
1245  for (UInt_t j = 0; j < GetNvar(); j++) {
1246  x = branchVar[j];
1247  if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1248  neuron = GetInputNeuron(j);
1249  neuron->ForceValue(x);
1250  }
1251 
1252  ForceNetworkCalculations();
1253  UpdateNetwork(desired, eventWeight);
1254 }
1255 
1256 ////////////////////////////////////////////////////////////////////////////////
1257 /// train network over a single event
1258 /// this uses the new event model
1259 
1260 void TMVA::MethodMLP::TrainOneEvent(Int_t ievt)
1261 {
1262  // note: the normalization of event weights will affect the choice
1263  // of learning rate, one will have to experiment to get the right value.
1264  // in general, if the "average" event weight is 1, the learning rate
1265  // should be good if set around 0.02 (a good value if all event weights are 1)
1266 
1267  const Event * ev = GetEvent(ievt);
1268  Double_t eventWeight = ev->GetWeight();
1269  ForceNetworkInputs( ev );
1270  ForceNetworkCalculations();
1271  if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1272  if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1273  else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1274 }
1275 
1276 ////////////////////////////////////////////////////////////////////////////////
1277 /// get the desired output of this event
1278 
1279 Double_t TMVA::MethodMLP::GetDesiredOutput( const Event* ev )
1280 {
1281  return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin(); //zjh
1282 }
1283 
1284 ////////////////////////////////////////////////////////////////////////////////
1285 /// update the network based on how closely
1286 /// the output matched the desired output
1287 
1288 void TMVA::MethodMLP::UpdateNetwork(Double_t desired, Double_t eventWeight)
1289 {
1290  Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1291  if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ; //zjh
1292  else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired); //zjh
1293  else Log() << kFATAL << "Estimator type unspecified!!" << Endl; //zjh
1294  error *= eventWeight;
1295  GetOutputNeuron()->SetError(error);
1296  CalculateNeuronDeltas();
1297  UpdateSynapses();
1298 }
1299 
1300 ////////////////////////////////////////////////////////////////////////////////
1301 /// update the network based on how closely
1302 /// the output matched the desired output
1303 
1304 void TMVA::MethodMLP::UpdateNetwork(const std::vector<Float_t>& desired, Double_t eventWeight)
1305 {
1306  // Norm for softmax
1307  Double_t norm = 0.;
1308  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1309  Double_t act = GetOutputNeuron(i)->GetActivationValue();
1310  norm += TMath::Exp(act);
1311  }
1312 
1313  // Get output of network, and apply softmax
1314  for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1315  Double_t act = GetOutputNeuron(i)->GetActivationValue();
1316  Double_t output = TMath::Exp(act) / norm;
1317  Double_t error = output - desired.at(i);
1318  error *= eventWeight;
1319  GetOutputNeuron(i)->SetError(error);
1320  }
1321 
1322  // Do backpropagation
1323  CalculateNeuronDeltas();
1324  UpdateSynapses();
1325 }
1326 
1327 ////////////////////////////////////////////////////////////////////////////////
1328 /// have each neuron calculate its delta by back propagation
1329 
1330 void TMVA::MethodMLP::CalculateNeuronDeltas()
1331 {
1332  TNeuron* neuron;
1333  Int_t numNeurons;
1334  Int_t numLayers = fNetwork->GetEntriesFast();
1335  TObjArray* curLayer;
1336 
1337  // step backwards through the network (back propagation)
1338  // deltas calculated starting at output layer
1339  for (Int_t i = numLayers-1; i >= 0; i--) {
1340  curLayer = (TObjArray*)fNetwork->At(i);
1341  numNeurons = curLayer->GetEntriesFast();
1342 
1343  for (Int_t j = 0; j < numNeurons; j++) {
1344  neuron = (TNeuron*) curLayer->At(j);
1345  neuron->CalculateDelta();
1346  }
1347  }
1348 }
1349 
1350 ////////////////////////////////////////////////////////////////////////////////
1351 /// create genetics class similar to GeneticCut
1352 /// give it vector of parameter ranges (parameters = weights)
1353 /// link fitness function of this class to ComputeEstimator
1354 /// instantiate GA (see MethodCuts)
1355 /// run it
1356 /// then this should exist for GA, Minuit and random sampling
1357 
1358 void TMVA::MethodMLP::GeneticMinimize()
1359 {
1360  PrintMessage("Minimizing Estimator with GA");
1361 
1362  // define GA parameters
1363  fGA_preCalc = 1;
1364  fGA_SC_steps = 10;
1365  fGA_SC_rate = 5;
1366  fGA_SC_factor = 0.95;
1367  fGA_nsteps = 30;
1368 
1369  // ranges
1370  std::vector<Interval*> ranges;
1371 
1372  Int_t numWeights = fSynapses->GetEntriesFast();
1373  for (Int_t ivar=0; ivar< numWeights; ivar++) {
1374  ranges.push_back( new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1375  }
1376 
1377  FitterBase *gf = new GeneticFitter( *this, Log().GetPrintedSource(), ranges, GetOptions() );
1378  gf->Run();
1379 
1380  Double_t estimator = CalculateEstimator();
1381  Log() << kINFO << "GA: estimator after optimization: " << estimator << Endl;
1382 }
1383 
1384 ////////////////////////////////////////////////////////////////////////////////
1385 /// interface to the estimate
1386 
1387 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1388 {
1389  return ComputeEstimator( parameters );
1390 }
1391 
1392 ////////////////////////////////////////////////////////////////////////////////
1393 /// this function is called by GeneticANN for GA optimization
1394 
1395 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1396 {
1397  TSynapse* synapse;
1398  Int_t numSynapses = fSynapses->GetEntriesFast();
1399 
1400  for (Int_t i = 0; i < numSynapses; i++) {
1401  synapse = (TSynapse*)fSynapses->At(i);
1402  synapse->SetWeight(parameters.at(i));
1403  }
1404  if (fUseRegulator) UpdatePriors(); //zjh
1405 
1406  Double_t estimator = CalculateEstimator();
1407 
1408  return estimator;
1409 }
1410 
1411 ////////////////////////////////////////////////////////////////////////////////
1412 /// update synapse error fields and adjust the weights (if in sequential mode)
1413 
1414 void TMVA::MethodMLP::UpdateSynapses()
1415 {
1416  TNeuron* neuron;
1417  Int_t numNeurons;
1418  TObjArray* curLayer;
1419  Int_t numLayers = fNetwork->GetEntriesFast();
1420 
1421  for (Int_t i = 0; i < numLayers; i++) {
1422  curLayer = (TObjArray*)fNetwork->At(i);
1423  numNeurons = curLayer->GetEntriesFast();
1424 
1425  for (Int_t j = 0; j < numNeurons; j++) {
1426  neuron = (TNeuron*) curLayer->At(j);
1427  if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1428  else neuron->UpdateSynapsesSequential();
1429  }
1430  }
1431 }
1432 
1433 ////////////////////////////////////////////////////////////////////////////////
1434 /// just adjust the synapse weights (should be called in batch mode)
1435 
1436 void TMVA::MethodMLP::AdjustSynapseWeights()
1437 {
1438  TNeuron* neuron;
1439  Int_t numNeurons;
1440  TObjArray* curLayer;
1441  Int_t numLayers = fNetwork->GetEntriesFast();
1442 
1443  for (Int_t i = numLayers-1; i >= 0; i--) {
1444  curLayer = (TObjArray*)fNetwork->At(i);
1445  numNeurons = curLayer->GetEntriesFast();
1446 
1447  for (Int_t j = 0; j < numNeurons; j++) {
1448  neuron = (TNeuron*) curLayer->At(j);
1449  neuron->AdjustSynapseWeights();
1450  }
1451  }
1452 }
1453 
1454 ////////////////////////////////////////////////////////////////////////////////
1455 
1456 void TMVA::MethodMLP::UpdatePriors() //zjh
1457 {
1458  fPrior=0;
1459  fPriorDev.clear();
1460  Int_t nSynapses = fSynapses->GetEntriesFast();
1461  for (Int_t i=0;i<nSynapses;i++) {
1462  TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1463  fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1464  fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1465  }
1466 }
1467 
1468 ////////////////////////////////////////////////////////////////////////////////
1469 
1470 void TMVA::MethodMLP::UpdateRegulators() //zjh
1471 {
1472  TMatrixD InvH(0,0);
1473  GetApproxInvHessian(InvH);
1474  Int_t numSynapses=fSynapses->GetEntriesFast();
1475  Int_t numRegulators=fRegulators.size();
1476  Float_t gamma=0,
1477  variance=1.; // Gaussian noise
1478  std::vector<Int_t> nWDP(numRegulators);
1479  std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1480  for (int i=0;i<numSynapses;i++) {
1481  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1482  Int_t idx=fRegulatorIdx[i];
1483  nWDP[idx]++;
1484  trace[idx]+=InvH[i][i];
1485  gamma+=1-fRegulators[idx]*InvH[i][i];
1486  weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1487  }
1488  if (fEstimator==kMSE) {
1489  if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1490  else variance=CalculateEstimator( Types::kTraining, 0 );
1491  }
1492 
1493  //Log() << kDEBUG << Endl;
1494  for (int i=0;i<numRegulators;i++)
1495  {
1496  //fRegulators[i]=variance*(nWDP[i]-fRegulators[i]*trace[i])/weightSum[i];
1497  fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1498  if (fRegulators[i]<0) fRegulators[i]=0;
1499  Log()<<kDEBUG<<"R"<<i<<":"<<fRegulators[i]<<"\t";
1500  }
1501  float trainE = CalculateEstimator( Types::kTraining, 0 ) ; // estimator for training sample //zjh
1502  float testE = CalculateEstimator( Types::kTesting, 0 ) ; // estimator for test sample //zjh
1503 
1504  Log()<<kDEBUG<<"\n"<<"trainE:"<<trainE<<"\ttestE:"<<testE<<"\tvariance:"<<variance<<"\tgamma:"<<gamma<<Endl;
1505 
1506 }
1507 
1508 ////////////////////////////////////////////////////////////////////////////////
1509 
1510 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian, bool regulate) //zjh
1511 {
1512  Int_t numSynapses=fSynapses->GetEntriesFast();
1513  InvHessian.ResizeTo( numSynapses, numSynapses );
1514  InvHessian=0;
1515  TMatrixD sens(numSynapses,1);
1516  TMatrixD sensT(1,numSynapses);
1517  Int_t nEvents = GetNEvents();
1518  for (Int_t i=0;i<nEvents;i++) {
1519  GetEvent(i);
1520  double outputValue=GetMvaValue(); // force calculation
1521  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1522  CalculateNeuronDeltas();
1523  for (Int_t j = 0; j < numSynapses; j++){
1524  TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1525  synapses->InitDelta();
1526  synapses->CalculateDelta();
1527  sens[j][0]=sensT[0][j]=synapses->GetDelta();
1528  }
1529  if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1530  else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1531  }
1532 
1533  // TVectorD eValue(numSynapses);
1534  if (regulate) {
1535  for (Int_t i = 0; i < numSynapses; i++){
1536  InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1537  }
1538  }
1539  else {
1540  for (Int_t i = 0; i < numSynapses; i++){
1541  InvHessian[i][i]+=1e-6; //to avoid precision problem that will destroy the pos-def
1542  }
1543  }
1544 
1545  InvHessian.Invert();
1546 
1547 }
1548 
1549 ////////////////////////////////////////////////////////////////////////////////
1550 
1551 Double_t TMVA::MethodMLP::GetMvaValue( Double_t* errLower, Double_t* errUpper )
1552 {
1553  Double_t MvaValue = MethodANNBase::GetMvaValue();// contains back propagation
1554 
1555  // no hessian (old training file) or no error requested
1556  if (!fCalculateErrors || errLower==0 || errUpper==0)
1557  return MvaValue;
1558 
1559  Double_t MvaUpper,MvaLower,median,variance;
1560  Int_t numSynapses=fSynapses->GetEntriesFast();
1561  if (fInvHessian.GetNcols()!=numSynapses) {
1562  Log() << kWARNING << "inconsistent dimension " << fInvHessian.GetNcols() << " vs " << numSynapses << Endl;
1563  }
1564  TMatrixD sens(numSynapses,1);
1565  TMatrixD sensT(1,numSynapses);
1566  GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1567  //GetOutputNeuron()->SetError(1.);
1568  CalculateNeuronDeltas();
1569  for (Int_t i = 0; i < numSynapses; i++){
1570  TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1571  synapses->InitDelta();
1572  synapses->CalculateDelta();
1573  sensT[0][i]=synapses->GetDelta();
1574  }
1575  sens.Transpose(sensT);
1576  TMatrixD sig=sensT*fInvHessian*sens;
1577  variance=sig[0][0];
1578  median=GetOutputNeuron()->GetValue();
1579 
1580  if (variance<0) {
1581  Log()<<kWARNING<<"Negative variance!!! median=" << median << "\tvariance(sigma^2)=" << variance <<Endl;
1582  variance=0;
1583  }
1584  variance=sqrt(variance);
1585 
1586  //upper
1587  MvaUpper=fOutput->Eval(median+variance);
1588  if(errUpper)
1589  *errUpper=MvaUpper-MvaValue;
1590 
1591  //lower
1592  MvaLower=fOutput->Eval(median-variance);
1593  if(errLower)
1594  *errLower=MvaValue-MvaLower;
1595 
1596  return MvaValue;
1597 }
1598 
1599 
1600 #ifdef MethodMLP_UseMinuit__
1601 
1602 ////////////////////////////////////////////////////////////////////////////////
1603 /// minimize using Minuit
1604 
1605 void TMVA::MethodMLP::MinuitMinimize()
1606 {
1607  fNumberOfWeights = fSynapses->GetEntriesFast();
1608 
1609  TFitter* tfitter = new TFitter( fNumberOfWeights );
1610 
1611  // minuit-specific settings
1612  Double_t args[10];
1613 
1614  // output level
1615  args[0] = 2; // put to 0 for results only, or to -1 for no garbage
1616  tfitter->ExecuteCommand( "SET PRINTOUT", args, 1 );
1617  tfitter->ExecuteCommand( "SET NOWARNINGS", args, 0 );
1618 
1619  double w[54];
1620 
1621  // init parameters
1622  for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1623  TString parName = Form("w%i", ipar);
1624  tfitter->SetParameter( ipar,
1625  parName, w[ipar], 0.1, 0, 0 );
1626  }
1627 
1628  // define the CFN function
1629  tfitter->SetFCN( &IFCN );
1630 
1631  // define fit strategy
1632  args[0] = 2;
1633  tfitter->ExecuteCommand( "SET STRATEGY", args, 1 );
1634 
1635  // now do the fit !
1636  args[0] = 1e-04;
1637  tfitter->ExecuteCommand( "MIGRAD", args, 1 );
1638 
1639  Bool_t doBetter = kFALSE;
1640  Bool_t doEvenBetter = kFALSE;
1641  if (doBetter) {
1642  args[0] = 1e-04;
1643  tfitter->ExecuteCommand( "IMPROVE", args, 1 );
1644 
1645  if (doEvenBetter) {
1646  args[0] = 500;
1647  tfitter->ExecuteCommand( "MINOS", args, 1 );
1648  }
1649  }
1650 }
1651 
1652 ////////////////////////////////////////////////////////////////////////////////
1653 /// Evaluate the minimisation function
1654 ///
1655 /// Input parameters:
1656 /// - npars: number of currently variable parameters
1657 /// CAUTION: this is not (necessarily) the dimension of the fitPars vector !
1658 /// - fitPars: array of (constant and variable) parameters
1659 /// - iflag: indicates what is to be calculated (see example below)
1660 /// - grad: array of gradients
1661 ///
1662 /// Output parameters:
1663 /// - f: the calculated function value.
1664 /// - grad: the (optional) vector of first derivatives).
1665 
1666 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1667 {
1668  ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1669 }
1670 
1671 TTHREAD_TLS(Int_t) nc = 0;
1672 TTHREAD_TLS(double) minf = 1000000;
1673 
1674 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1675 {
1676  // first update the weights
1677  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1678  TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1679  synapse->SetWeight(fitPars[ipar]);
1680  }
1681 
1682  // now compute the estimator
1683  f = CalculateEstimator();
1684 
1685  nc++;
1686  if (f < minf) minf = f;
1687  for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] << " ";
1688  Log() << kDEBUG << Endl;
1689  Log() << kDEBUG << "***** New estimator: " << f << " min: " << minf << " --> ncalls: " << nc << Endl;
1690 }
1691 
1692 ////////////////////////////////////////////////////////////////////////////////
1693 /// global "this" pointer to be used in minuit
1694 
1695 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1696 {
1697  return fgThis;
1698 }
1699 
1700 #endif
1701 
1702 
1703 ////////////////////////////////////////////////////////////////////////////////
1704 /// write specific classifier response
1705 
1706 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1707 {
1708  MethodANNBase::MakeClassSpecific(fout, className);
1709 }
1710 
1711 ////////////////////////////////////////////////////////////////////////////////
1712 /// get help message text
1713 ///
1714 /// typical length of text line:
1715 /// "|--------------------------------------------------------------|"
1716 
1717 void TMVA::MethodMLP::GetHelpMessage() const
1718 {
1719  TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color("bold");
1720  TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color("reset");
1721 
1722  Log() << Endl;
1723  Log() << col << "--- Short description:" << colres << Endl;
1724  Log() << Endl;
1725  Log() << "The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1726  Log() << "forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1727  Log() << "defined hidden layer architecture, while the number of input (output)" << Endl;
1728  Log() << "nodes is determined by the input variables (output classes, i.e., " << Endl;
1729  Log() << "signal and one background). " << Endl;
1730  Log() << Endl;
1731  Log() << col << "--- Performance optimisation:" << colres << Endl;
1732  Log() << Endl;
1733  Log() << "Neural networks are stable and performing for a large variety of " << Endl;
1734  Log() << "linear and non-linear classification problems. However, in contrast" << Endl;
1735  Log() << "to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1736  Log() << "number of input variables that have only little discrimination power. " << Endl;
1737  Log() << "" << Endl;
1738  Log() << "In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1739  Log() << "(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1740  Log() << "a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1741  Log() << "net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1742  Log() << "tests, which is partly due to the slow convergence of its training" << Endl;
1743  Log() << "(at least 10k training cycles are required to achieve approximately" << Endl;
1744  Log() << "competitive results)." << Endl;
1745  Log() << Endl;
1746  Log() << col << "Overtraining: " << colres
1747  << "only the TMlpANN performs an explicit separation of the" << Endl;
1748  Log() << "full training sample into independent training and validation samples." << Endl;
1749  Log() << "We have found that in most high-energy physics applications the " << Endl;
1750  Log() << "available degrees of freedom (training events) are sufficient to " << Endl;
1751  Log() << "constrain the weights of the relatively simple architectures required" << Endl;
1752  Log() << "to achieve good performance. Hence no overtraining should occur, and " << Endl;
1753  Log() << "the use of validation samples would only reduce the available training" << Endl;
1754  Log() << "information. However, if the performance on the training sample is " << Endl;
1755  Log() << "found to be significantly better than the one found with the inde-" << Endl;
1756  Log() << "pendent test sample, caution is needed. The results for these samples " << Endl;
1757  Log() << "are printed to standard output at the end of each training job." << Endl;
1758  Log() << Endl;
1759  Log() << col << "--- Performance tuning via configuration options:" << colres << Endl;
1760  Log() << Endl;
1761  Log() << "The hidden layer architecture for all ANNs is defined by the option" << Endl;
1762  Log() << "\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1763  Log() << "neurons and the second N neurons (and so on), and where N is the number " << Endl;
1764  Log() << "of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1765  Log() << "in favour of more neurons in the first hidden layer." << Endl;
1766  Log() << "" << Endl;
1767  Log() << "The number of cycles should be above 500. As said, if the number of" << Endl;
1768  Log() << "adjustable weights is small compared to the training sample size," << Endl;
1769  Log() << "using a large number of training samples should not lead to overtraining." << Endl;
1770 }
1771