78 #ifdef MethodMLP_UseMinuit__
79 TMVA::MethodMLP* TMVA::MethodMLP::fgThis = 0;
80 Bool_t MethodMLP_UseMinuit = kTRUE;
85 ClassImp(TMVA::MethodMLP);
92 TMVA::MethodMLP::MethodMLP( const TString& jobName,
93 const TString& methodTitle,
95 const TString& theOption)
96 : MethodANNBase( jobName, Types::kMLP, methodTitle, theData, theOption),
97 fUseRegulator(false), fCalculateErrors(false),
98 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
99 fTrainingMethod(kBFGS), fTrainMethodS("BFGS"),
100 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
101 fSamplingTraining(false), fSamplingTesting(false),
102 fLastAlpha(0.0), fTau(0.),
103 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
104 fBPMode(kSequential), fBpModeS("None"),
105 fBatchSize(0), fTestRate(0), fEpochMon(false),
106 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
107 fGA_SC_rate(0), fGA_SC_factor(0.0),
108 fDeviationsFromTargets(0),
117 TMVA::MethodMLP::MethodMLP( DataSetInfo& theData,
118 const TString& theWeightFile)
119 : MethodANNBase( Types::kMLP, theData, theWeightFile),
120 fUseRegulator(false), fCalculateErrors(false),
121 fPrior(0.0), fPriorDev(0), fUpdateLimit(0),
122 fTrainingMethod(kBFGS), fTrainMethodS(
"BFGS"),
123 fSamplingFraction(1.0), fSamplingEpoch(0.0), fSamplingWeight(0.0),
124 fSamplingTraining(false), fSamplingTesting(false),
125 fLastAlpha(0.0), fTau(0.),
126 fResetStep(0), fLearnRate(0.0), fDecayRate(0.0),
127 fBPMode(kSequential), fBpModeS(
"None"),
128 fBatchSize(0), fTestRate(0), fEpochMon(false),
129 fGA_nsteps(0), fGA_preCalc(0), fGA_SC_steps(0),
130 fGA_SC_rate(0), fGA_SC_factor(0.0),
131 fDeviationsFromTargets(0),
140 TMVA::MethodMLP::~MethodMLP()
144 void TMVA::MethodMLP::Train()
154 Bool_t TMVA::MethodMLP::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
156 if (type == Types::kClassification && numberClasses == 2 )
return kTRUE;
157 if (type == Types::kMulticlass )
return kTRUE;
158 if (type == Types::kRegression )
return kTRUE;
166 void TMVA::MethodMLP::Init()
169 SetSignalReferenceCut( 0.5 );
170 #ifdef MethodMLP_UseMinuit__
197 void TMVA::MethodMLP::DeclareOptions()
199 DeclareOptionRef(fTrainMethodS=
"BP",
"TrainingMethod",
200 "Train with Back-Propagation (BP), BFGS Algorithm (BFGS), or Genetic Algorithm (GA - slower and worse)");
201 AddPreDefVal(TString(
"BP"));
202 AddPreDefVal(TString(
"GA"));
203 AddPreDefVal(TString(
"BFGS"));
205 DeclareOptionRef(fLearnRate=0.02,
"LearningRate",
"ANN learning rate parameter");
206 DeclareOptionRef(fDecayRate=0.01,
"DecayRate",
"Decay rate for learning parameter");
207 DeclareOptionRef(fTestRate =10,
"TestRate",
"Test for overtraining performed at each #th epochs");
208 DeclareOptionRef(fEpochMon = kFALSE,
"EpochMonitoring",
"Provide epoch-wise monitoring plots according to TestRate (caution: causes big ROOT output file!)" );
210 DeclareOptionRef(fSamplingFraction=1.0,
"Sampling",
"Only 'Sampling' (randomly selected) events are trained each epoch");
211 DeclareOptionRef(fSamplingEpoch=1.0,
"SamplingEpoch",
"Sampling is used for the first 'SamplingEpoch' epochs, afterwards, all events are taken for training");
212 DeclareOptionRef(fSamplingWeight=1.0,
"SamplingImportance",
" The sampling weights of events in epochs which successful (worse estimator than before) are multiplied with SamplingImportance, else they are divided.");
214 DeclareOptionRef(fSamplingTraining=kTRUE,
"SamplingTraining",
"The training sample is sampled");
215 DeclareOptionRef(fSamplingTesting= kFALSE,
"SamplingTesting" ,
"The testing sample is sampled");
217 DeclareOptionRef(fResetStep=50,
"ResetStep",
"How often BFGS should reset history");
218 DeclareOptionRef(fTau =3.0,
"Tau",
"LineSearch \"size step\"");
220 DeclareOptionRef(fBpModeS=
"sequential",
"BPMode",
221 "Back-propagation learning mode: sequential or batch");
222 AddPreDefVal(TString(
"sequential"));
223 AddPreDefVal(TString(
"batch"));
225 DeclareOptionRef(fBatchSize=-1,
"BatchSize",
226 "Batch size: number of events/batch, only set if in Batch Mode, -1 for BatchSize=number_of_events");
228 DeclareOptionRef(fImprovement=1e-30,
"ConvergenceImprove",
229 "Minimum improvement which counts as improvement (<0 means automatic convergence check is turned off)");
231 DeclareOptionRef(fSteps=-1,
"ConvergenceTests",
232 "Number of steps (without improvement) required for convergence (<0 means automatic convergence check is turned off)");
234 DeclareOptionRef(fUseRegulator=kFALSE,
"UseRegulator",
235 "Use regulator to avoid over-training");
236 DeclareOptionRef(fUpdateLimit=10000,
"UpdateLimit",
237 "Maximum times of regulator update");
238 DeclareOptionRef(fCalculateErrors=kFALSE,
"CalculateErrors",
239 "Calculates inverse Hessian matrix at the end of the training to be able to calculate the uncertainties of an MVA value");
241 DeclareOptionRef(fWeightRange=1.0,
"WeightRange",
242 "Take the events for the estimator calculations from small deviations from the desired value to large deviations only over the weight range");
249 void TMVA::MethodMLP::ProcessOptions()
251 MethodANNBase::ProcessOptions();
254 if (IgnoreEventsWithNegWeightsInTraining()) {
256 <<
"Will ignore negative events in training!"
261 if (fTrainMethodS ==
"BP" ) fTrainingMethod = kBP;
262 else if (fTrainMethodS ==
"BFGS") fTrainingMethod = kBFGS;
263 else if (fTrainMethodS ==
"GA" ) fTrainingMethod = kGA;
265 if (fBpModeS ==
"sequential") fBPMode = kSequential;
266 else if (fBpModeS ==
"batch") fBPMode = kBatch;
270 if (fBPMode == kBatch) {
271 Data()->SetCurrentType(Types::kTraining);
272 Int_t numEvents = Data()->GetNEvents();
273 if (fBatchSize < 1 || fBatchSize > numEvents) fBatchSize = numEvents;
280 void TMVA::MethodMLP::InitializeLearningRates()
282 Log() << kDEBUG <<
"Initialize learning rates" << Endl;
284 Int_t numSynapses = fSynapses->GetEntriesFast();
285 for (Int_t i = 0; i < numSynapses; i++) {
286 synapse = (TSynapse*)fSynapses->At(i);
287 synapse->SetLearningRate(fLearnRate);
294 Double_t TMVA::MethodMLP::CalculateEstimator( Types::ETreeType treeType, Int_t iEpoch )
297 if (treeType!=Types::kTraining && treeType!=Types::kTesting) {
298 Log() << kFATAL <<
"<CalculateEstimator> fatal error: wrong tree type: " << treeType << Endl;
301 Types::ETreeType saveType = Data()->GetCurrentType();
302 Data()->SetCurrentType(treeType);
305 TString type = (treeType == Types::kTraining ?
"train" :
"test");
306 TString name = Form(
"convergencetest___mlp_%s_epoch_%04i", type.Data(), iEpoch);
307 TString nameB = name +
"_B";
308 TString nameS = name +
"_S";
313 if (fEpochMon && iEpoch >= 0 && !DoRegression()) {
314 histS =
new TH1F( nameS, nameS, nbin, -limit, limit );
315 histB =
new TH1F( nameB, nameB, nbin, -limit, limit );
318 Double_t estimator = 0;
321 Int_t nEvents = GetNEvents();
322 UInt_t nClasses = DataInfo().GetNClasses();
323 UInt_t nTgts = DataInfo().GetNTargets();
326 Float_t sumOfWeights = 0.f;
327 if( fWeightRange < 1.f ){
328 fDeviationsFromTargets =
new std::vector<std::pair<Float_t,Float_t> >(nEvents);
331 for (Int_t i = 0; i < nEvents; i++) {
333 const Event* ev = GetEvent(i);
335 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
336 && (saveType == Types::kTraining)){
340 Double_t w = ev->GetWeight();
342 ForceNetworkInputs( ev );
343 ForceNetworkCalculations();
345 Double_t d = 0, v = 0;
346 if (DoRegression()) {
347 for (UInt_t itgt = 0; itgt < nTgts; itgt++) {
348 v = GetOutputNeuron( itgt )->GetActivationValue();
349 Double_t targetValue = ev->GetTarget( itgt );
350 Double_t dt = v - targetValue;
354 }
else if (DoMulticlass() ) {
355 UInt_t cls = ev->GetClass();
356 if (fEstimator==kCE){
358 for (UInt_t icls = 0; icls < nClasses; icls++) {
359 Float_t activationValue = GetOutputNeuron( icls )->GetActivationValue();
360 norm += exp( activationValue );
362 d = exp( activationValue );
364 d = -TMath::Log(d/norm);
367 for (UInt_t icls = 0; icls < nClasses; icls++) {
368 Double_t desired = (icls==cls) ? 1.0 : 0.0;
369 v = GetOutputNeuron( icls )->GetActivationValue();
370 d = (desired-v)*(desired-v);
375 Double_t desired = DataInfo().IsSignal(ev)?1.:0.;
376 v = GetOutputNeuron()->GetActivationValue();
377 if (fEstimator==kMSE) d = (desired-v)*(desired-v);
378 else if (fEstimator==kCE) d = -2*(desired*TMath::Log(v)+(1-desired)*TMath::Log(1-v));
382 if( fDeviationsFromTargets )
383 fDeviationsFromTargets->push_back(std::pair<Float_t,Float_t>(d,w));
389 if (DataInfo().IsSignal(ev) && histS != 0) histS->Fill(
float(v),
float(w) );
390 else if (histB != 0) histB->Fill(
float(v),
float(w) );
394 if( fDeviationsFromTargets ) {
395 std::sort(fDeviationsFromTargets->begin(),fDeviationsFromTargets->end());
397 Float_t sumOfWeightsInRange = fWeightRange*sumOfWeights;
400 Float_t weightRangeCut = fWeightRange*sumOfWeights;
401 Float_t weightSum = 0.f;
402 for(std::vector<std::pair<Float_t,Float_t> >::iterator itDev = fDeviationsFromTargets->begin(), itDevEnd = fDeviationsFromTargets->end(); itDev != itDevEnd; ++itDev ){
403 float deviation = (*itDev).first;
404 float devWeight = (*itDev).second;
405 weightSum += devWeight;
406 if( weightSum <= weightRangeCut ) {
407 estimator += devWeight*deviation;
411 sumOfWeights = sumOfWeightsInRange;
412 delete fDeviationsFromTargets;
415 if (histS != 0) fEpochMonHistS.push_back( histS );
416 if (histB != 0) fEpochMonHistB.push_back( histB );
421 estimator = estimator/Float_t(sumOfWeights);
426 Data()->SetCurrentType( saveType );
429 if (fEpochMon && iEpoch >= 0 && !DoRegression() && treeType == Types::kTraining) {
430 CreateWeightMonitoringHists( Form(
"epochmonitoring___epoch_%04i_weights_hist", iEpoch), &fEpochMonHistW );
438 void TMVA::MethodMLP::Train(Int_t nEpochs)
442 Log() << kFATAL <<
"ANN Network is not initialized, doing it now!"<< Endl;
443 SetAnalysisType(GetAnalysisType());
445 Log() << kDEBUG <<
"reinitialize learning rates" << Endl;
446 InitializeLearningRates();
448 PrintMessage(
"Training Network");
450 Int_t nEvents=GetNEvents();
451 Int_t nSynapses=fSynapses->GetEntriesFast();
452 if (nSynapses>nEvents)
453 Log()<<kWARNING<<
"ANN too complicated: #events="<<nEvents<<
"\t#synapses="<<nSynapses<<Endl;
455 fIPyMaxIter = nEpochs;
456 if (fInteractive && fInteractive->NotInitialized()){
457 std::vector<TString> titles = {
"Error on training set",
"Error on test set"};
458 fInteractive->Init(titles);
461 #ifdef MethodMLP_UseMinuit__
462 if (useMinuit) MinuitMinimize();
464 if (fTrainingMethod == kGA) GeneticMinimize();
465 else if (fTrainingMethod == kBFGS) BFGSMinimize(nEpochs);
466 else BackPropagationMinimize(nEpochs);
469 float trainE = CalculateEstimator( Types::kTraining, 0 ) ;
470 float testE = CalculateEstimator( Types::kTesting, 0 ) ;
472 Log()<<kINFO<<
"Finalizing handling of Regulator terms, trainE="<<trainE<<
" testE="<<testE<<Endl;
474 Log()<<kINFO<<
"Done with handling of Regulator terms"<<Endl;
477 if( fCalculateErrors || fUseRegulator )
479 Int_t numSynapses=fSynapses->GetEntriesFast();
480 fInvHessian.ResizeTo(numSynapses,numSynapses);
481 GetApproxInvHessian( fInvHessian ,
false);
489 void TMVA::MethodMLP::BFGSMinimize( Int_t nEpochs )
491 Timer timer( (fSteps>0?100:nEpochs), GetName() );
494 Int_t nbinTest = Int_t(nEpochs/fTestRate);
497 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
498 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
499 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
500 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
503 Int_t nSynapses = fSynapses->GetEntriesFast();
504 Int_t nWeights = nSynapses;
506 for (Int_t i=0;i<nSynapses;i++) {
507 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
508 synapse->SetDEDw(0.0);
511 std::vector<Double_t> buffer( nWeights );
512 for (Int_t i=0;i<nWeights;i++) buffer[i] = 0.;
514 TMatrixD Dir ( nWeights, 1 );
515 TMatrixD Hessian ( nWeights, nWeights );
516 TMatrixD Gamma ( nWeights, 1 );
517 TMatrixD Delta ( nWeights, 1 );
519 Int_t RegUpdateTimes=0;
520 Double_t AccuError=0;
522 Double_t trainE = -1;
527 if(fSamplingTraining || fSamplingTesting)
528 Data()->InitSampling(1.0,1.0,fRandomSeed);
530 if (fSteps > 0) Log() << kINFO <<
"Inaccurate progress timing for MLP... " << Endl;
531 timer.DrawProgressBar( 0 );
534 for (Int_t i = 0; i < nEpochs; i++) {
536 if (fExitFromTraining)
break;
538 if (Float_t(i)/nEpochs < fSamplingEpoch) {
539 if ((i+1)%fTestRate == 0 || (i == 0)) {
540 if (fSamplingTraining) {
541 Data()->SetCurrentType( Types::kTraining );
542 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
543 Data()->CreateSampling();
545 if (fSamplingTesting) {
546 Data()->SetCurrentType( Types::kTesting );
547 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
548 Data()->CreateSampling();
553 Data()->SetCurrentType( Types::kTraining );
554 Data()->InitSampling(1.0,1.0);
555 Data()->SetCurrentType( Types::kTesting );
556 Data()->InitSampling(1.0,1.0);
558 Data()->SetCurrentType( Types::kTraining );
567 SetGammaDelta( Gamma, Delta, buffer );
569 if (i % fResetStep == 0 && i<0.5*nEpochs) {
571 Hessian.UnitMatrix();
575 if (GetHessian( Hessian, Gamma, Delta )) {
577 Hessian.UnitMatrix();
580 else SetDir( Hessian, Dir );
584 if (DerivDir( Dir ) > 0) {
586 Hessian.UnitMatrix();
589 if (LineSearch( Dir, buffer, &dError )) {
590 Hessian.UnitMatrix();
593 if (LineSearch(Dir, buffer, &dError)) {
595 Log() << kFATAL <<
"Line search failed! Huge troubles somewhere..." << Endl;
600 if (dError<0) Log()<<kWARNING<<
"\nnegative dError=" <<dError<<Endl;
603 if ( fUseRegulator && RegUpdateTimes<fUpdateLimit && RegUpdateCD>=5 && fabs(dError)<0.1*AccuError) {
604 Log()<<kDEBUG<<
"\n\nUpdate regulators "<<RegUpdateTimes<<
" on epoch "<<i<<
"\tdError="<<dError<<Endl;
606 Hessian.UnitMatrix();
614 if ((i+1)%fTestRate == 0) {
617 trainE = CalculateEstimator( Types::kTraining, i ) ;
618 testE = CalculateEstimator( Types::kTesting, i ) ;
619 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
622 fEstimatorHistTrain->Fill( i+1, trainE );
623 fEstimatorHistTest ->Fill( i+1, testE );
625 Bool_t success = kFALSE;
626 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
629 Data()->EventResult( success );
631 SetCurrentValue( testE );
632 if (HasConverged()) {
633 if (Float_t(i)/nEpochs < fSamplingEpoch) {
634 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
636 ResetConvergenceCounter();
643 TString convText = Form(
"<D^2> (train/test/epoch): %.4g/%.4g/%d", trainE, testE,i );
645 Float_t progress = 0;
646 if (Float_t(i)/nEpochs < fSamplingEpoch)
648 progress = Progress()*fSamplingFraction*100*fSamplingEpoch;
652 progress = 100.0*(fSamplingFraction*fSamplingEpoch+(1.0-fSamplingEpoch)*Progress());
654 Float_t progress2= 100.0*RegUpdateTimes/fUpdateLimit;
655 if (progress2>progress) progress=progress2;
656 timer.DrawProgressBar( Int_t(progress), convText );
659 Int_t progress=Int_t(nEpochs*RegUpdateTimes/Float_t(fUpdateLimit));
660 if (progress<i) progress=i;
661 timer.DrawProgressBar( progress, convText );
674 void TMVA::MethodMLP::SetGammaDelta( TMatrixD &Gamma, TMatrixD &Delta, std::vector<Double_t> &buffer )
676 Int_t nWeights = fSynapses->GetEntriesFast();
679 Int_t nSynapses = fSynapses->GetEntriesFast();
680 for (Int_t i=0;i<nSynapses;i++) {
681 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
682 Gamma[IDX++][0] = -synapse->GetDEDw();
685 for (Int_t i=0;i<nWeights;i++) Delta[i][0] = buffer[i];
690 for (Int_t i=0;i<nSynapses;i++)
692 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
693 Gamma[IDX++][0] += synapse->GetDEDw();
699 void TMVA::MethodMLP::ComputeDEDw()
701 Int_t nSynapses = fSynapses->GetEntriesFast();
702 for (Int_t i=0;i<nSynapses;i++) {
703 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
704 synapse->SetDEDw( 0.0 );
707 Int_t nEvents = GetNEvents();
708 Int_t nPosEvents = nEvents;
709 for (Int_t i=0;i<nEvents;i++) {
711 const Event* ev = GetEvent(i);
712 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
713 && (Data()->GetCurrentType() == Types::kTraining)){
720 for (Int_t j=0;j<nSynapses;j++) {
721 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
722 synapse->SetDEDw( synapse->GetDEDw() + synapse->GetDelta() );
726 for (Int_t i=0;i<nSynapses;i++) {
727 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
728 Double_t DEDw=synapse->GetDEDw();
729 if (fUseRegulator) DEDw+=fPriorDev[i];
730 synapse->SetDEDw( DEDw / nPosEvents );
736 void TMVA::MethodMLP::SimulateEvent(
const Event* ev )
738 Double_t eventWeight = ev->GetWeight();
740 ForceNetworkInputs( ev );
741 ForceNetworkCalculations();
743 if (DoRegression()) {
744 UInt_t ntgt = DataInfo().GetNTargets();
745 for (UInt_t itgt = 0; itgt < ntgt; itgt++) {
746 Double_t desired = ev->GetTarget(itgt);
747 Double_t error = ( GetOutputNeuron( itgt )->GetActivationValue() - desired )*eventWeight;
748 GetOutputNeuron( itgt )->SetError(error);
750 }
else if (DoMulticlass()) {
751 UInt_t nClasses = DataInfo().GetNClasses();
752 UInt_t cls = ev->GetClass();
753 for (UInt_t icls = 0; icls < nClasses; icls++) {
754 Double_t desired = ( cls==icls ? 1.0 : 0.0 );
755 Double_t error = ( GetOutputNeuron( icls )->GetActivationValue() - desired )*eventWeight;
756 GetOutputNeuron( icls )->SetError(error);
759 Double_t desired = GetDesiredOutput( ev );
761 if (fEstimator==kMSE) error = ( GetOutputNeuron()->GetActivationValue() - desired )*eventWeight;
762 else if (fEstimator==kCE) error = -eventWeight/(GetOutputNeuron()->GetActivationValue() -1 + desired);
763 GetOutputNeuron()->SetError(error);
766 CalculateNeuronDeltas();
767 for (Int_t j=0;j<fSynapses->GetEntriesFast();j++) {
768 TSynapse *synapse = (TSynapse*)fSynapses->At(j);
769 synapse->InitDelta();
770 synapse->CalculateDelta();
776 void TMVA::MethodMLP::SteepestDir( TMatrixD &Dir )
779 Int_t nSynapses = fSynapses->GetEntriesFast();
781 for (Int_t i=0;i<nSynapses;i++) {
782 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
783 Dir[IDX++][0] = -synapse->GetDEDw();
789 Bool_t TMVA::MethodMLP::GetHessian( TMatrixD &Hessian, TMatrixD &Gamma, TMatrixD &Delta )
791 TMatrixD gd(Gamma, TMatrixD::kTransposeMult, Delta);
792 if ((Double_t) gd[0][0] == 0.)
return kTRUE;
793 TMatrixD aHg(Hessian, TMatrixD::kMult, Gamma);
794 TMatrixD tmp(Gamma, TMatrixD::kTransposeMult, Hessian);
795 TMatrixD gHg(Gamma, TMatrixD::kTransposeMult, aHg);
796 Double_t a = 1 / (Double_t) gd[0][0];
797 Double_t f = 1 + ((Double_t)gHg[0][0]*a);
798 TMatrixD res(TMatrixD(Delta, TMatrixD::kMult, TMatrixD(TMatrixD::kTransposed,Delta)));
800 res -= (TMatrixD(Delta, TMatrixD::kMult, tmp) + TMatrixD(aHg, TMatrixD::kMult,
801 TMatrixD(TMatrixD::kTransposed,Delta)));
810 void TMVA::MethodMLP::SetDir( TMatrixD &Hessian, TMatrixD &dir )
813 Int_t nSynapses = fSynapses->GetEntriesFast();
814 TMatrixD DEDw(nSynapses, 1);
816 for (Int_t i=0;i<nSynapses;i++) {
817 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
818 DEDw[IDX++][0] = synapse->GetDEDw();
821 dir = Hessian * DEDw;
822 for (Int_t i=0;i<IDX;i++) dir[i][0] = -dir[i][0];
827 Double_t TMVA::MethodMLP::DerivDir( TMatrixD &Dir )
830 Int_t nSynapses = fSynapses->GetEntriesFast();
831 Double_t Result = 0.0;
833 for (Int_t i=0;i<nSynapses;i++) {
834 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
835 Result += Dir[IDX++][0] * synapse->GetDEDw();
842 Bool_t TMVA::MethodMLP::LineSearch(TMatrixD &Dir, std::vector<Double_t> &buffer, Double_t* dError)
845 Int_t nSynapses = fSynapses->GetEntriesFast();
846 Int_t nWeights = nSynapses;
848 std::vector<Double_t> Origin(nWeights);
849 for (Int_t i=0;i<nSynapses;i++) {
850 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
851 Origin[i] = synapse->GetWeight();
854 Double_t err1 = GetError();
855 Double_t errOrigin=err1;
856 Double_t alpha1 = 0.;
857 Double_t alpha2 = fLastAlpha;
860 if (alpha2 < 0.01) alpha2 = 0.01;
861 else if (alpha2 > 2.0) alpha2 = 2.0;
862 Double_t alpha_original = alpha2;
863 Double_t alpha3 = alpha2;
865 SetDirWeights( Origin, Dir, alpha2 );
866 Double_t err2 = GetError();
868 Double_t err3 = err2;
869 Bool_t bingo = kFALSE;
873 for (Int_t i=0;i<100;i++) {
875 SetDirWeights(Origin, Dir, alpha3);
887 SetDirWeights(Origin, Dir, 0.);
892 for (Int_t i=0;i<100;i++) {
895 Log() << kWARNING <<
"linesearch, starting to investigate direction opposite of steepestDIR" << Endl;
896 alpha2 = -alpha_original;
898 SetDirWeights(Origin, Dir, alpha2);
908 SetDirWeights(Origin, Dir, 0.);
909 Log() << kWARNING <<
"linesearch, failed even in opposite direction of steepestDIR" << Endl;
915 if (alpha1>0 && alpha2>0 && alpha3 > 0) {
916 fLastAlpha = 0.5 * (alpha1 + alpha3 -
917 (err3 - err1) / ((err3 - err2) / ( alpha3 - alpha2 )
918 - ( err2 - err1 ) / (alpha2 - alpha1 )));
924 fLastAlpha = fLastAlpha < 10000 ? fLastAlpha : 10000;
926 SetDirWeights(Origin, Dir, fLastAlpha);
932 Double_t finalError = GetError();
933 if (finalError > err1) {
934 Log() << kWARNING <<
"Line search increased error! Something is wrong."
935 <<
"fLastAlpha=" << fLastAlpha <<
"al123=" << alpha1 <<
" "
936 << alpha2 <<
" " << alpha3 <<
" err1="<< err1 <<
" errfinal=" << finalError << Endl;
939 for (Int_t i=0;i<nSynapses;i++) {
940 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
941 buffer[IDX] = synapse->GetWeight() - Origin[IDX];
945 if (dError) (*dError)=(errOrigin-finalError)/finalError;
952 void TMVA::MethodMLP::SetDirWeights( std::vector<Double_t> &Origin, TMatrixD &Dir, Double_t alpha )
955 Int_t nSynapses = fSynapses->GetEntriesFast();
957 for (Int_t i=0;i<nSynapses;i++) {
958 TSynapse *synapse = (TSynapse*)fSynapses->At(i);
959 synapse->SetWeight( Origin[IDX] + Dir[IDX][0] * alpha );
962 if (fUseRegulator) UpdatePriors();
968 Double_t TMVA::MethodMLP::GetError()
970 Int_t nEvents = GetNEvents();
971 UInt_t ntgts = GetNTargets();
972 Double_t Result = 0.;
974 for (Int_t i=0;i<nEvents;i++) {
975 const Event* ev = GetEvent(i);
977 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
978 && (Data()->GetCurrentType() == Types::kTraining)){
984 if (DoRegression()) {
985 for (UInt_t itgt = 0; itgt < ntgts; itgt++) {
986 error += GetMSEErr( ev, itgt );
988 }
else if ( DoMulticlass() ){
989 for( UInt_t icls = 0, iclsEnd = DataInfo().GetNClasses(); icls < iclsEnd; icls++ ){
990 error += GetMSEErr( ev, icls );
993 if (fEstimator==kMSE) error = GetMSEErr( ev );
994 else if (fEstimator==kCE) error= GetCEErr( ev );
996 Result += error * ev->GetWeight();
998 if (fUseRegulator) Result+=fPrior;
999 if (Result<0) Log()<<kWARNING<<
"\nNegative Error!!! :"<<Result-fPrior<<
"+"<<fPrior<<Endl;
1005 Double_t TMVA::MethodMLP::GetMSEErr(
const Event* ev, UInt_t index )
1008 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1009 Double_t target = 0;
1010 if (DoRegression()) target = ev->GetTarget( index );
1011 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1012 else target = GetDesiredOutput( ev );
1014 error = 0.5*(output-target)*(output-target);
1022 Double_t TMVA::MethodMLP::GetCEErr(
const Event* ev, UInt_t index )
1025 Double_t output = GetOutputNeuron( index )->GetActivationValue();
1026 Double_t target = 0;
1027 if (DoRegression()) target = ev->GetTarget( index );
1028 else if (DoMulticlass()) target = (ev->GetClass() == index ? 1.0 : 0.0 );
1029 else target = GetDesiredOutput( ev );
1031 error = -(target*TMath::Log(output)+(1-target)*TMath::Log(1-output));
1039 void TMVA::MethodMLP::BackPropagationMinimize(Int_t nEpochs)
1042 Timer timer( (fSteps>0?100:nEpochs), GetName() );
1043 Int_t lateEpoch = (Int_t)(nEpochs*0.95) - 1;
1046 Int_t nbinTest = Int_t(nEpochs/fTestRate);
1049 fEstimatorHistTrain =
new TH1F(
"estimatorHistTrain",
"training estimator",
1050 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1051 fEstimatorHistTest =
new TH1F(
"estimatorHistTest",
"test estimator",
1052 nbinTest, Int_t(fTestRate/2), nbinTest*fTestRate+Int_t(fTestRate/2) );
1054 if(fSamplingTraining || fSamplingTesting)
1055 Data()->InitSampling(1.0,1.0,fRandomSeed);
1057 if (fSteps > 0) Log() << kINFO <<
"Inaccurate progress timing for MLP... " << Endl;
1058 timer.DrawProgressBar(0);
1061 Double_t trainE = -1;
1062 Double_t testE = -1;
1065 for (Int_t i = 0; i < nEpochs; i++) {
1067 if (fExitFromTraining)
break;
1068 fIPyCurrentIter = i;
1069 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1070 if ((i+1)%fTestRate == 0 || (i == 0)) {
1071 if (fSamplingTraining) {
1072 Data()->SetCurrentType( Types::kTraining );
1073 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1074 Data()->CreateSampling();
1076 if (fSamplingTesting) {
1077 Data()->SetCurrentType( Types::kTesting );
1078 Data()->InitSampling(fSamplingFraction,fSamplingWeight);
1079 Data()->CreateSampling();
1084 Data()->SetCurrentType( Types::kTraining );
1085 Data()->InitSampling(1.0,1.0);
1086 Data()->SetCurrentType( Types::kTesting );
1087 Data()->InitSampling(1.0,1.0);
1089 Data()->SetCurrentType( Types::kTraining );
1092 DecaySynapseWeights(i >= lateEpoch);
1095 if ((i+1)%fTestRate == 0) {
1096 trainE = CalculateEstimator( Types::kTraining, i );
1097 testE = CalculateEstimator( Types::kTesting, i );
1098 if (fInteractive) fInteractive->AddPoint(i+1, trainE, testE);
1101 fEstimatorHistTrain->Fill( i+1, trainE );
1102 fEstimatorHistTest ->Fill( i+1, testE );
1104 Bool_t success = kFALSE;
1105 if ((testE < GetCurrentValue()) || (GetCurrentValue()<1e-100)) {
1108 Data()->EventResult( success );
1110 SetCurrentValue( testE );
1111 if (HasConverged()) {
1112 if (Float_t(i)/nEpochs < fSamplingEpoch) {
1113 Int_t newEpoch = Int_t(fSamplingEpoch*nEpochs);
1115 ResetConvergenceCounter();
1118 if (lateEpoch > i) lateEpoch = i;
1125 TString convText = Form(
"<D^2> (train/test): %.4g/%.4g", trainE, testE );
1127 Float_t progress = 0;
1128 if (Float_t(i)/nEpochs < fSamplingEpoch)
1129 progress = Progress()*fSamplingEpoch*fSamplingFraction*100;
1131 progress = 100*(fSamplingEpoch*fSamplingFraction+(1.0-fSamplingFraction*fSamplingEpoch)*Progress());
1133 timer.DrawProgressBar( Int_t(progress), convText );
1136 timer.DrawProgressBar( i, convText );
1144 void TMVA::MethodMLP::TrainOneEpoch()
1146 Int_t nEvents = Data()->GetNEvents();
1149 Int_t* index =
new Int_t[nEvents];
1150 for (Int_t i = 0; i < nEvents; i++) index[i] = i;
1151 Shuffle(index, nEvents);
1154 for (Int_t i = 0; i < nEvents; i++) {
1156 const Event * ev = GetEvent(index[i]);
1157 if ((ev->GetWeight() < 0) && IgnoreEventsWithNegWeightsInTraining()
1158 && (Data()->GetCurrentType() == Types::kTraining)){
1162 TrainOneEvent(index[i]);
1165 if (fBPMode == kBatch && (i+1)%fBatchSize == 0) {
1166 AdjustSynapseWeights();
1167 if (fgPRINT_BATCH) {
1192 void TMVA::MethodMLP::Shuffle(Int_t* index, Int_t n)
1196 for (Int_t i = 0; i < n; i++) {
1197 j = (Int_t) (frgen->Rndm() * a);
1200 index[j] = index[i];
1210 void TMVA::MethodMLP::DecaySynapseWeights(Bool_t lateEpoch)
1213 Int_t numSynapses = fSynapses->GetEntriesFast();
1214 for (Int_t i = 0; i < numSynapses; i++) {
1215 synapse = (TSynapse*)fSynapses->At(i);
1216 if (lateEpoch) synapse->DecayLearningRate(TMath::Sqrt(fDecayRate));
1217 else synapse->DecayLearningRate(fDecayRate);
1224 void TMVA::MethodMLP::TrainOneEventFast(Int_t ievt, Float_t*& branchVar, Int_t& type)
1234 Double_t eventWeight = 1.0;
1238 if (type == 0) desired = fOutput->GetMin();
1239 else desired = fOutput->GetMax();
1245 for (UInt_t j = 0; j < GetNvar(); j++) {
1247 if (IsNormalised()) x = gTools().NormVariable( x, GetXmin( j ), GetXmax( j ) );
1248 neuron = GetInputNeuron(j);
1249 neuron->ForceValue(x);
1252 ForceNetworkCalculations();
1253 UpdateNetwork(desired, eventWeight);
1260 void TMVA::MethodMLP::TrainOneEvent(Int_t ievt)
1267 const Event * ev = GetEvent(ievt);
1268 Double_t eventWeight = ev->GetWeight();
1269 ForceNetworkInputs( ev );
1270 ForceNetworkCalculations();
1271 if (DoRegression()) UpdateNetwork( ev->GetTargets(), eventWeight );
1272 if (DoMulticlass()) UpdateNetwork( *DataInfo().GetTargetsForMulticlass( ev ), eventWeight );
1273 else UpdateNetwork( GetDesiredOutput( ev ), eventWeight );
1279 Double_t TMVA::MethodMLP::GetDesiredOutput(
const Event* ev )
1281 return DataInfo().IsSignal(ev)?fOutput->GetMax():fOutput->GetMin();
1288 void TMVA::MethodMLP::UpdateNetwork(Double_t desired, Double_t eventWeight)
1290 Double_t error = GetOutputNeuron()->GetActivationValue() - desired;
1291 if (fEstimator==kMSE) error = GetOutputNeuron()->GetActivationValue() - desired ;
1292 else if (fEstimator==kCE) error = -1./(GetOutputNeuron()->GetActivationValue() -1 + desired);
1293 else Log() << kFATAL <<
"Estimator type unspecified!!" << Endl;
1294 error *= eventWeight;
1295 GetOutputNeuron()->SetError(error);
1296 CalculateNeuronDeltas();
1304 void TMVA::MethodMLP::UpdateNetwork(
const std::vector<Float_t>& desired, Double_t eventWeight)
1308 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1309 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1310 norm += TMath::Exp(act);
1314 for (UInt_t i = 0, iEnd = desired.size(); i < iEnd; ++i) {
1315 Double_t act = GetOutputNeuron(i)->GetActivationValue();
1316 Double_t output = TMath::Exp(act) / norm;
1317 Double_t error = output - desired.at(i);
1318 error *= eventWeight;
1319 GetOutputNeuron(i)->SetError(error);
1323 CalculateNeuronDeltas();
1330 void TMVA::MethodMLP::CalculateNeuronDeltas()
1334 Int_t numLayers = fNetwork->GetEntriesFast();
1335 TObjArray* curLayer;
1339 for (Int_t i = numLayers-1; i >= 0; i--) {
1340 curLayer = (TObjArray*)fNetwork->At(i);
1341 numNeurons = curLayer->GetEntriesFast();
1343 for (Int_t j = 0; j < numNeurons; j++) {
1344 neuron = (TNeuron*) curLayer->At(j);
1345 neuron->CalculateDelta();
1358 void TMVA::MethodMLP::GeneticMinimize()
1360 PrintMessage(
"Minimizing Estimator with GA");
1366 fGA_SC_factor = 0.95;
1370 std::vector<Interval*> ranges;
1372 Int_t numWeights = fSynapses->GetEntriesFast();
1373 for (Int_t ivar=0; ivar< numWeights; ivar++) {
1374 ranges.push_back(
new Interval( 0, GetXmax(ivar) - GetXmin(ivar) ));
1377 FitterBase *gf =
new GeneticFitter( *
this, Log().GetPrintedSource(), ranges, GetOptions() );
1380 Double_t estimator = CalculateEstimator();
1381 Log() << kINFO <<
"GA: estimator after optimization: " << estimator << Endl;
1387 Double_t TMVA::MethodMLP::EstimatorFunction( std::vector<Double_t>& parameters)
1389 return ComputeEstimator( parameters );
1395 Double_t TMVA::MethodMLP::ComputeEstimator( std::vector<Double_t>& parameters)
1398 Int_t numSynapses = fSynapses->GetEntriesFast();
1400 for (Int_t i = 0; i < numSynapses; i++) {
1401 synapse = (TSynapse*)fSynapses->At(i);
1402 synapse->SetWeight(parameters.at(i));
1404 if (fUseRegulator) UpdatePriors();
1406 Double_t estimator = CalculateEstimator();
1414 void TMVA::MethodMLP::UpdateSynapses()
1418 TObjArray* curLayer;
1419 Int_t numLayers = fNetwork->GetEntriesFast();
1421 for (Int_t i = 0; i < numLayers; i++) {
1422 curLayer = (TObjArray*)fNetwork->At(i);
1423 numNeurons = curLayer->GetEntriesFast();
1425 for (Int_t j = 0; j < numNeurons; j++) {
1426 neuron = (TNeuron*) curLayer->At(j);
1427 if (fBPMode == kBatch) neuron->UpdateSynapsesBatch();
1428 else neuron->UpdateSynapsesSequential();
1436 void TMVA::MethodMLP::AdjustSynapseWeights()
1440 TObjArray* curLayer;
1441 Int_t numLayers = fNetwork->GetEntriesFast();
1443 for (Int_t i = numLayers-1; i >= 0; i--) {
1444 curLayer = (TObjArray*)fNetwork->At(i);
1445 numNeurons = curLayer->GetEntriesFast();
1447 for (Int_t j = 0; j < numNeurons; j++) {
1448 neuron = (TNeuron*) curLayer->At(j);
1449 neuron->AdjustSynapseWeights();
1456 void TMVA::MethodMLP::UpdatePriors()
1460 Int_t nSynapses = fSynapses->GetEntriesFast();
1461 for (Int_t i=0;i<nSynapses;i++) {
1462 TSynapse* synapse = (TSynapse*)fSynapses->At(i);
1463 fPrior+=0.5*fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight())*(synapse->GetWeight());
1464 fPriorDev.push_back(fRegulators[fRegulatorIdx[i]]*(synapse->GetWeight()));
1470 void TMVA::MethodMLP::UpdateRegulators()
1473 GetApproxInvHessian(InvH);
1474 Int_t numSynapses=fSynapses->GetEntriesFast();
1475 Int_t numRegulators=fRegulators.size();
1478 std::vector<Int_t> nWDP(numRegulators);
1479 std::vector<Double_t> trace(numRegulators),weightSum(numRegulators);
1480 for (
int i=0;i<numSynapses;i++) {
1481 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1482 Int_t idx=fRegulatorIdx[i];
1484 trace[idx]+=InvH[i][i];
1485 gamma+=1-fRegulators[idx]*InvH[i][i];
1486 weightSum[idx]+=(synapses->GetWeight())*(synapses->GetWeight());
1488 if (fEstimator==kMSE) {
1489 if (GetNEvents()>gamma) variance=CalculateEstimator( Types::kTraining, 0 )/(1-(gamma/GetNEvents()));
1490 else variance=CalculateEstimator( Types::kTraining, 0 );
1494 for (
int i=0;i<numRegulators;i++)
1497 fRegulators[i]=variance*nWDP[i]/(weightSum[i]+variance*trace[i]);
1498 if (fRegulators[i]<0) fRegulators[i]=0;
1499 Log()<<kDEBUG<<
"R"<<i<<
":"<<fRegulators[i]<<
"\t";
1501 float trainE = CalculateEstimator( Types::kTraining, 0 ) ;
1502 float testE = CalculateEstimator( Types::kTesting, 0 ) ;
1504 Log()<<kDEBUG<<
"\n"<<
"trainE:"<<trainE<<
"\ttestE:"<<testE<<
"\tvariance:"<<variance<<
"\tgamma:"<<gamma<<Endl;
1510 void TMVA::MethodMLP::GetApproxInvHessian(TMatrixD& InvHessian,
bool regulate)
1512 Int_t numSynapses=fSynapses->GetEntriesFast();
1513 InvHessian.ResizeTo( numSynapses, numSynapses );
1515 TMatrixD sens(numSynapses,1);
1516 TMatrixD sensT(1,numSynapses);
1517 Int_t nEvents = GetNEvents();
1518 for (Int_t i=0;i<nEvents;i++) {
1520 double outputValue=GetMvaValue();
1521 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1522 CalculateNeuronDeltas();
1523 for (Int_t j = 0; j < numSynapses; j++){
1524 TSynapse* synapses = (TSynapse*)fSynapses->At(j);
1525 synapses->InitDelta();
1526 synapses->CalculateDelta();
1527 sens[j][0]=sensT[0][j]=synapses->GetDelta();
1529 if (fEstimator==kMSE ) InvHessian+=sens*sensT;
1530 else if (fEstimator==kCE) InvHessian+=(outputValue*(1-outputValue))*sens*sensT;
1535 for (Int_t i = 0; i < numSynapses; i++){
1536 InvHessian[i][i]+=fRegulators[fRegulatorIdx[i]];
1540 for (Int_t i = 0; i < numSynapses; i++){
1541 InvHessian[i][i]+=1e-6;
1545 InvHessian.Invert();
1551 Double_t TMVA::MethodMLP::GetMvaValue( Double_t* errLower, Double_t* errUpper )
1553 Double_t MvaValue = MethodANNBase::GetMvaValue();
1556 if (!fCalculateErrors || errLower==0 || errUpper==0)
1559 Double_t MvaUpper,MvaLower,median,variance;
1560 Int_t numSynapses=fSynapses->GetEntriesFast();
1561 if (fInvHessian.GetNcols()!=numSynapses) {
1562 Log() << kWARNING <<
"inconsistent dimension " << fInvHessian.GetNcols() <<
" vs " << numSynapses << Endl;
1564 TMatrixD sens(numSynapses,1);
1565 TMatrixD sensT(1,numSynapses);
1566 GetOutputNeuron()->SetError(1./fOutput->EvalDerivative(GetOutputNeuron()->GetValue()));
1568 CalculateNeuronDeltas();
1569 for (Int_t i = 0; i < numSynapses; i++){
1570 TSynapse* synapses = (TSynapse*)fSynapses->At(i);
1571 synapses->InitDelta();
1572 synapses->CalculateDelta();
1573 sensT[0][i]=synapses->GetDelta();
1575 sens.Transpose(sensT);
1576 TMatrixD sig=sensT*fInvHessian*sens;
1578 median=GetOutputNeuron()->GetValue();
1581 Log()<<kWARNING<<
"Negative variance!!! median=" << median <<
"\tvariance(sigma^2)=" << variance <<Endl;
1584 variance=sqrt(variance);
1587 MvaUpper=fOutput->Eval(median+variance);
1589 *errUpper=MvaUpper-MvaValue;
1592 MvaLower=fOutput->Eval(median-variance);
1594 *errLower=MvaValue-MvaLower;
1600 #ifdef MethodMLP_UseMinuit__
1605 void TMVA::MethodMLP::MinuitMinimize()
1607 fNumberOfWeights = fSynapses->GetEntriesFast();
1609 TFitter* tfitter =
new TFitter( fNumberOfWeights );
1616 tfitter->ExecuteCommand(
"SET PRINTOUT", args, 1 );
1617 tfitter->ExecuteCommand(
"SET NOWARNINGS", args, 0 );
1622 for (Int_t ipar=0; ipar < fNumberOfWeights; ipar++) {
1623 TString parName = Form(
"w%i", ipar);
1624 tfitter->SetParameter( ipar,
1625 parName, w[ipar], 0.1, 0, 0 );
1629 tfitter->SetFCN( &IFCN );
1633 tfitter->ExecuteCommand(
"SET STRATEGY", args, 1 );
1637 tfitter->ExecuteCommand(
"MIGRAD", args, 1 );
1639 Bool_t doBetter = kFALSE;
1640 Bool_t doEvenBetter = kFALSE;
1643 tfitter->ExecuteCommand(
"IMPROVE", args, 1 );
1647 tfitter->ExecuteCommand(
"MINOS", args, 1 );
1666 void TMVA::MethodMLP::IFCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1668 ((MethodMLP*)GetThisPtr())->FCN( npars, grad, f, fitPars, iflag );
1671 TTHREAD_TLS(Int_t) nc = 0;
1672 TTHREAD_TLS(
double) minf = 1000000;
1674 void TMVA::MethodMLP::FCN( Int_t& npars, Double_t* grad, Double_t &f, Double_t* fitPars, Int_t iflag )
1677 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) {
1678 TSynapse* synapse = (TSynapse*)fSynapses->At(ipar);
1679 synapse->SetWeight(fitPars[ipar]);
1683 f = CalculateEstimator();
1686 if (f < minf) minf = f;
1687 for (Int_t ipar=0; ipar<fNumberOfWeights; ipar++) Log() << kDEBUG << fitPars[ipar] <<
" ";
1688 Log() << kDEBUG << Endl;
1689 Log() << kDEBUG <<
"***** New estimator: " << f <<
" min: " << minf <<
" --> ncalls: " << nc << Endl;
1695 TMVA::MethodMLP* TMVA::MethodMLP::GetThisPtr()
1706 void TMVA::MethodMLP::MakeClassSpecific( std::ostream& fout,
const TString& className )
const
1708 MethodANNBase::MakeClassSpecific(fout, className);
1717 void TMVA::MethodMLP::GetHelpMessage()
const
1719 TString col = gConfig().WriteOptionsReference() ? TString() : gTools().Color(
"bold");
1720 TString colres = gConfig().WriteOptionsReference() ? TString() : gTools().Color(
"reset");
1723 Log() << col <<
"--- Short description:" << colres << Endl;
1725 Log() <<
"The MLP artificial neural network (ANN) is a traditional feed-" << Endl;
1726 Log() <<
"forward multilayer perceptron implementation. The MLP has a user-" << Endl;
1727 Log() <<
"defined hidden layer architecture, while the number of input (output)" << Endl;
1728 Log() <<
"nodes is determined by the input variables (output classes, i.e., " << Endl;
1729 Log() <<
"signal and one background). " << Endl;
1731 Log() << col <<
"--- Performance optimisation:" << colres << Endl;
1733 Log() <<
"Neural networks are stable and performing for a large variety of " << Endl;
1734 Log() <<
"linear and non-linear classification problems. However, in contrast" << Endl;
1735 Log() <<
"to (e.g.) boosted decision trees, the user is advised to reduce the " << Endl;
1736 Log() <<
"number of input variables that have only little discrimination power. " << Endl;
1737 Log() <<
"" << Endl;
1738 Log() <<
"In the tests we have carried out so far, the MLP and ROOT networks" << Endl;
1739 Log() <<
"(TMlpANN, interfaced via TMVA) performed equally well, with however" << Endl;
1740 Log() <<
"a clear speed advantage for the MLP. The Clermont-Ferrand neural " << Endl;
1741 Log() <<
"net (CFMlpANN) exhibited worse classification performance in these" << Endl;
1742 Log() <<
"tests, which is partly due to the slow convergence of its training" << Endl;
1743 Log() <<
"(at least 10k training cycles are required to achieve approximately" << Endl;
1744 Log() <<
"competitive results)." << Endl;
1746 Log() << col <<
"Overtraining: " << colres
1747 <<
"only the TMlpANN performs an explicit separation of the" << Endl;
1748 Log() <<
"full training sample into independent training and validation samples." << Endl;
1749 Log() <<
"We have found that in most high-energy physics applications the " << Endl;
1750 Log() <<
"available degrees of freedom (training events) are sufficient to " << Endl;
1751 Log() <<
"constrain the weights of the relatively simple architectures required" << Endl;
1752 Log() <<
"to achieve good performance. Hence no overtraining should occur, and " << Endl;
1753 Log() <<
"the use of validation samples would only reduce the available training" << Endl;
1754 Log() <<
"information. However, if the performance on the training sample is " << Endl;
1755 Log() <<
"found to be significantly better than the one found with the inde-" << Endl;
1756 Log() <<
"pendent test sample, caution is needed. The results for these samples " << Endl;
1757 Log() <<
"are printed to standard output at the end of each training job." << Endl;
1759 Log() << col <<
"--- Performance tuning via configuration options:" << colres << Endl;
1761 Log() <<
"The hidden layer architecture for all ANNs is defined by the option" << Endl;
1762 Log() <<
"\"HiddenLayers=N+1,N,...\", where here the first hidden layer has N+1" << Endl;
1763 Log() <<
"neurons and the second N neurons (and so on), and where N is the number " << Endl;
1764 Log() <<
"of input variables. Excessive numbers of hidden layers should be avoided," << Endl;
1765 Log() <<
"in favour of more neurons in the first hidden layer." << Endl;
1766 Log() <<
"" << Endl;
1767 Log() <<
"The number of cycles should be above 500. As said, if the number of" << Endl;
1768 Log() <<
"adjustable weights is small compared to the training sample size," << Endl;
1769 Log() <<
"using a large number of training samples should not lead to overtraining." << Endl;