88 REGISTER_METHOD(Boost)
90 ClassImp(TMVA::MethodBoost);
94 TMVA::MethodBoost::MethodBoost( const TString& jobName,
95 const TString& methodTitle,
97 const TString& theOption ) :
98 TMVA::MethodCompositeBase( jobName, Types::kBoost, methodTitle, theData, theOption)
100 , fDetailedMonitoring(kFALSE)
103 , fBaggedSampleFraction(0)
104 , fBoostedMethodTitle(methodTitle)
105 , fBoostedMethodOptions(theOption)
106 , fMonitorBoostedMethod(kFALSE)
111 , fOverlap_integral(0.0)
114 fMVAvalues =
new std::vector<Float_t>;
115 fDataSetManager = NULL;
116 fHistoricBoolOption = kFALSE;
121 TMVA::MethodBoost::MethodBoost( DataSetInfo& dsi,
122 const TString& theWeightFile)
123 : TMVA::MethodCompositeBase( Types::kBoost, dsi, theWeightFile)
125 , fDetailedMonitoring(kFALSE)
128 , fBaggedSampleFraction(0)
129 , fBoostedMethodTitle(
"")
130 , fBoostedMethodOptions(
"")
131 , fMonitorBoostedMethod(kFALSE)
136 , fOverlap_integral(0.0)
139 fMVAvalues =
new std::vector<Float_t>;
140 fDataSetManager = NULL;
141 fHistoricBoolOption = kFALSE;
147 TMVA::MethodBoost::~MethodBoost(
void )
149 fMethodWeight.clear();
153 fTrainSigMVAHist.clear();
154 fTrainBgdMVAHist.clear();
155 fBTrainSigMVAHist.clear();
156 fBTrainBgdMVAHist.clear();
157 fTestSigMVAHist.clear();
158 fTestBgdMVAHist.clear();
170 Bool_t TMVA::MethodBoost::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
172 if (type == Types::kClassification && numberClasses == 2)
return kTRUE;
180 void TMVA::MethodBoost::DeclareOptions()
182 DeclareOptionRef( fBoostNum = 1,
"Boost_Num",
183 "Number of times the classifier is boosted" );
185 DeclareOptionRef( fMonitorBoostedMethod = kTRUE,
"Boost_MonitorMethod",
186 "Write monitoring histograms for each boosted classifier" );
188 DeclareOptionRef( fDetailedMonitoring = kFALSE,
"Boost_DetailedMonitoring",
189 "Produce histograms for detailed boost monitoring" );
191 DeclareOptionRef( fBoostType =
"AdaBoost",
"Boost_Type",
"Boosting type for the classifiers" );
192 AddPreDefVal(TString(
"RealAdaBoost"));
193 AddPreDefVal(TString(
"AdaBoost"));
194 AddPreDefVal(TString(
"Bagging"));
196 DeclareOptionRef(fBaggedSampleFraction=.6,
"Boost_BaggedSampleFraction",
"Relative size of bagged event sample to original size of the data sample (used whenever bagging is used)" );
198 DeclareOptionRef( fAdaBoostBeta = 1.0,
"Boost_AdaBoostBeta",
199 "The ADA boost parameter that sets the effect of every boost step on the events' weights" );
201 DeclareOptionRef( fTransformString =
"step",
"Boost_Transform",
202 "Type of transform applied to every boosted method linear, log, step" );
203 AddPreDefVal(TString(
"step"));
204 AddPreDefVal(TString(
"linear"));
205 AddPreDefVal(TString(
"log"));
206 AddPreDefVal(TString(
"gauss"));
208 DeclareOptionRef( fRandomSeed = 0,
"Boost_RandomSeed",
209 "Seed for random number generator used for bagging" );
211 TMVA::MethodCompositeBase::fMethods.reserve(fBoostNum);
219 void TMVA::MethodBoost::DeclareCompatibilityOptions()
222 MethodBase::DeclareCompatibilityOptions();
224 DeclareOptionRef( fHistoricOption =
"ByError",
"Boost_MethodWeightType",
225 "How to set the final weight of the boosted classifiers" );
226 AddPreDefVal(TString(
"ByError"));
227 AddPreDefVal(TString(
"Average"));
228 AddPreDefVal(TString(
"ByROC"));
229 AddPreDefVal(TString(
"ByOverlap"));
230 AddPreDefVal(TString(
"LastMethod"));
232 DeclareOptionRef( fHistoricOption =
"step",
"Boost_Transform",
233 "Type of transform applied to every boosted method linear, log, step" );
234 AddPreDefVal(TString(
"step"));
235 AddPreDefVal(TString(
"linear"));
236 AddPreDefVal(TString(
"log"));
237 AddPreDefVal(TString(
"gauss"));
242 AddPreDefVal(TString(
"HighEdgeGauss"));
243 AddPreDefVal(TString(
"HighEdgeCoPara"));
246 DeclareOptionRef( fHistoricBoolOption,
"Boost_RecalculateMVACut",
247 "Recalculate the classifier MVA Signallike cut at every boost iteration" );
254 Bool_t TMVA::MethodBoost::BookMethod( Types::EMVA theMethod, TString methodTitle, TString theOption )
256 fBoostedMethodName = Types::Instance().GetMethodName( theMethod );
257 fBoostedMethodTitle = methodTitle;
258 fBoostedMethodOptions = theOption;
259 TString opts=theOption;
268 void TMVA::MethodBoost::Init()
275 void TMVA::MethodBoost::InitHistos()
278 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
280 results->Store(
new TH1F(
"MethodWeight",
"Normalized Classifier Weight",fBoostNum,0,fBoostNum),
"ClassifierWeight");
281 results->Store(
new TH1F(
"BoostWeight",
"Boost Weight",fBoostNum,0,fBoostNum),
"BoostWeight");
282 results->Store(
new TH1F(
"ErrFraction",
"Error Fraction (by boosted event weights)",fBoostNum,0,fBoostNum),
"ErrorFraction");
283 if (fDetailedMonitoring){
284 results->Store(
new TH1F(
"ROCIntegral_test",
"ROC integral of single classifier (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_test");
285 results->Store(
new TH1F(
"ROCIntegralBoosted_test",
"ROC integral of boosted method (testing sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_test");
286 results->Store(
new TH1F(
"ROCIntegral_train",
"ROC integral of single classifier (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegral_train");
287 results->Store(
new TH1F(
"ROCIntegralBoosted_train",
"ROC integral of boosted method (training sample)",fBoostNum,0,fBoostNum),
"ROCIntegralBoosted_train");
288 results->Store(
new TH1F(
"OverlapIntegal_train",
"Overlap integral (training sample)",fBoostNum,0,fBoostNum),
"Overlap");
292 results->GetHist(
"ClassifierWeight")->GetXaxis()->SetTitle(
"Index of boosted classifier");
293 results->GetHist(
"ClassifierWeight")->GetYaxis()->SetTitle(
"Classifier Weight");
294 results->GetHist(
"BoostWeight")->GetXaxis()->SetTitle(
"Index of boosted classifier");
295 results->GetHist(
"BoostWeight")->GetYaxis()->SetTitle(
"Boost Weight");
296 results->GetHist(
"ErrorFraction")->GetXaxis()->SetTitle(
"Index of boosted classifier");
297 results->GetHist(
"ErrorFraction")->GetYaxis()->SetTitle(
"Error Fraction");
298 if (fDetailedMonitoring){
299 results->GetHist(
"ROCIntegral_test")->GetXaxis()->SetTitle(
"Index of boosted classifier");
300 results->GetHist(
"ROCIntegral_test")->GetYaxis()->SetTitle(
"ROC integral of single classifier");
301 results->GetHist(
"ROCIntegralBoosted_test")->GetXaxis()->SetTitle(
"Number of boosts");
302 results->GetHist(
"ROCIntegralBoosted_test")->GetYaxis()->SetTitle(
"ROC integral boosted");
303 results->GetHist(
"ROCIntegral_train")->GetXaxis()->SetTitle(
"Index of boosted classifier");
304 results->GetHist(
"ROCIntegral_train")->GetYaxis()->SetTitle(
"ROC integral of single classifier");
305 results->GetHist(
"ROCIntegralBoosted_train")->GetXaxis()->SetTitle(
"Number of boosts");
306 results->GetHist(
"ROCIntegralBoosted_train")->GetYaxis()->SetTitle(
"ROC integral boosted");
307 results->GetHist(
"Overlap")->GetXaxis()->SetTitle(
"Index of boosted classifier");
308 results->GetHist(
"Overlap")->GetYaxis()->SetTitle(
"Overlap integral");
311 results->Store(
new TH1F(
"SoverBtotal",
"S/B in reweighted training sample",fBoostNum,0,fBoostNum),
"SoverBtotal");
312 results->GetHist(
"SoverBtotal")->GetYaxis()->SetTitle(
"S/B (boosted sample)");
313 results->GetHist(
"SoverBtotal")->GetXaxis()->SetTitle(
"Index of boosted classifier");
315 results->Store(
new TH1F(
"SeparationGain",
"SeparationGain",fBoostNum,0,fBoostNum),
"SeparationGain");
316 results->GetHist(
"SeparationGain")->GetYaxis()->SetTitle(
"SeparationGain");
317 results->GetHist(
"SeparationGain")->GetXaxis()->SetTitle(
"Index of boosted classifier");
321 fMonitorTree=
new TTree(
"MonitorBoost",
"Boost variables");
322 fMonitorTree->Branch(
"iMethod",&fCurrentMethodIdx,
"iMethod/I");
323 fMonitorTree->Branch(
"boostWeight",&fBoostWeight,
"boostWeight/D");
324 fMonitorTree->Branch(
"errorFraction",&fMethodError,
"errorFraction/D");
325 fMonitorBoostedMethod = kTRUE;
332 void TMVA::MethodBoost::CheckSetup()
334 Log() << kDEBUG <<
"CheckSetup: fBoostType="<<fBoostType << Endl;
335 Log() << kDEBUG <<
"CheckSetup: fAdaBoostBeta="<<fAdaBoostBeta<<Endl;
336 Log() << kDEBUG <<
"CheckSetup: fBoostWeight="<<fBoostWeight<<Endl;
337 Log() << kDEBUG <<
"CheckSetup: fMethodError="<<fMethodError<<Endl;
338 Log() << kDEBUG <<
"CheckSetup: fBoostNum="<<fBoostNum << Endl;
339 Log() << kDEBUG <<
"CheckSetup: fRandomSeed=" << fRandomSeed<< Endl;
340 Log() << kDEBUG <<
"CheckSetup: fTrainSigMVAHist.size()="<<fTrainSigMVAHist.size()<<Endl;
341 Log() << kDEBUG <<
"CheckSetup: fTestSigMVAHist.size()="<<fTestSigMVAHist.size()<<Endl;
342 Log() << kDEBUG <<
"CheckSetup: fMonitorBoostedMethod=" << (fMonitorBoostedMethod?
"true" :
"false") << Endl;
343 Log() << kDEBUG <<
"CheckSetup: MName=" << fBoostedMethodName <<
" Title="<< fBoostedMethodTitle<< Endl;
344 Log() << kDEBUG <<
"CheckSetup: MOptions="<< fBoostedMethodOptions << Endl;
345 Log() << kDEBUG <<
"CheckSetup: fMonitorTree=" << fMonitorTree <<Endl;
346 Log() << kDEBUG <<
"CheckSetup: fCurrentMethodIdx=" <<fCurrentMethodIdx << Endl;
347 if (fMethods.size()>0) Log() << kDEBUG <<
"CheckSetup: fMethods[0]" <<fMethods[0]<<Endl;
348 Log() << kDEBUG <<
"CheckSetup: fMethodWeight.size()" << fMethodWeight.size() << Endl;
349 if (fMethodWeight.size()>0) Log() << kDEBUG <<
"CheckSetup: fMethodWeight[0]="<<fMethodWeight[0]<<Endl;
350 Log() << kDEBUG <<
"CheckSetup: trying to repair things" << Endl;
355 void TMVA::MethodBoost::Train()
357 TDirectory* methodDir( 0 );
358 TString dirName,dirTitle;
360 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
365 if (Data()->GetNTrainingEvents()==0) Log() << kFATAL <<
"<Train> Data() has zero events" << Endl;
366 Data()->SetCurrentType(Types::kTraining);
368 if (fMethods.size() > 0) fMethods.clear();
369 fMVAvalues->resize(Data()->GetNTrainingEvents(), 0.0);
371 Log() << kINFO <<
"Training "<< fBoostNum <<
" " << fBoostedMethodName <<
" with title " << fBoostedMethodTitle <<
" Classifiers ... patience please" << Endl;
372 Timer timer( fBoostNum, GetName() );
382 Ssiz_t varTrafoStart=fBoostedMethodOptions.Index(
"~VarTransform=");
383 if (varTrafoStart >0) {
384 Ssiz_t varTrafoEnd =fBoostedMethodOptions.Index(
":",varTrafoStart);
385 if (varTrafoEnd<varTrafoStart)
386 varTrafoEnd=fBoostedMethodOptions.Length();
387 fBoostedMethodOptions.Remove(varTrafoStart,varTrafoEnd-varTrafoStart);
392 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
394 if (fCurrentMethodIdx>0) TMVA::MsgLogger::InhibitOutput();
396 IMethod *method = ClassifierFactory::Instance().Create(
397 fBoostedMethodName.Data(), GetJobName(), Form(
"%s_B%04i", fBoostedMethodTitle.Data(), fCurrentMethodIdx),
398 DataInfo(), fBoostedMethodOptions);
399 TMVA::MsgLogger::EnableOutput();
402 fCurrentMethod = (
dynamic_cast<MethodBase*
>(method));
404 if (fCurrentMethod==0) {
405 Log() << kFATAL <<
"uups.. guess the booking of the " << fCurrentMethodIdx <<
"-th classifier somehow failed" << Endl;
410 if (fCurrentMethod->GetMethodType() == Types::kCategory) {
411 MethodCategory *methCat = (
dynamic_cast<MethodCategory*
>(fCurrentMethod));
413 Log() << kFATAL <<
"Method with type kCategory cannot be casted to MethodCategory. /MethodBoost" << Endl;
414 methCat->fDataSetManager = fDataSetManager;
417 fCurrentMethod->SetMsgType(kWARNING);
418 fCurrentMethod->SetupMethod();
419 fCurrentMethod->ParseOptions();
421 fCurrentMethod->SetAnalysisType( GetAnalysisType() );
422 fCurrentMethod->ProcessSetup();
423 fCurrentMethod->CheckSetup();
427 fCurrentMethod->RerouteTransformationHandler (&(this->GetTransformationHandler()));
433 if (fMonitorBoostedMethod) {
434 methodDir=GetFile()->GetDirectory(dirName=Form(
"%s_B%04i",fBoostedMethodName.Data(),fCurrentMethodIdx));
436 methodDir=BaseDir()->mkdir(dirName,dirTitle=Form(
"Directory Boosted %s #%04i", fBoostedMethodName.Data(),fCurrentMethodIdx));
438 fCurrentMethod->SetMethodDir(methodDir);
439 fCurrentMethod->BaseDir()->cd();
444 TMVA::MethodCompositeBase::fMethods.push_back(method);
445 timer.DrawProgressBar( fCurrentMethodIdx );
446 if (fCurrentMethodIdx==0) MonitorBoost(Types::kBoostProcBegin,fCurrentMethodIdx);
447 MonitorBoost(Types::kBeforeTraining,fCurrentMethodIdx);
448 TMVA::MsgLogger::InhibitOutput();
449 if (fBoostType==
"Bagging") Bagging();
451 TMVA::MsgLogger::EnableOutput();
452 if(!IsSilentFile())fCurrentMethod->WriteMonitoringHistosToFile();
458 if(!IsSilentFile())
if (fCurrentMethodIdx==0 && fMonitorBoostedMethod) CreateMVAHistorgrams();
465 MonitorBoost(Types::kBeforeBoosting,fCurrentMethodIdx);
466 SingleBoost(fCurrentMethod);
468 MonitorBoost(Types::kAfterBoosting,fCurrentMethodIdx);
469 results->GetHist(
"BoostWeight")->SetBinContent(fCurrentMethodIdx+1,fBoostWeight);
470 results->GetHist(
"ErrorFraction")->SetBinContent(fCurrentMethodIdx+1,fMethodError);
472 if (fDetailedMonitoring) {
473 fROC_training = GetBoostROCIntegral(kTRUE, Types::kTraining, kTRUE);
474 results->GetHist(
"ROCIntegral_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kTRUE, Types::kTesting));
475 results->GetHist(
"ROCIntegralBoosted_test")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTesting));
476 results->GetHist(
"ROCIntegral_train")->SetBinContent(fCurrentMethodIdx+1, fROC_training);
477 results->GetHist(
"ROCIntegralBoosted_train")->SetBinContent(fCurrentMethodIdx+1, GetBoostROCIntegral(kFALSE, Types::kTraining));
478 results->GetHist(
"Overlap")->SetBinContent(fCurrentMethodIdx+1, fOverlap_integral);
483 fMonitorTree->Fill();
487 Log() << kDEBUG <<
"AdaBoost (methodErr) err = " << fMethodError << Endl;
488 if (fMethodError > 0.49999) StopCounter++;
489 if (StopCounter > 0 && fBoostType !=
"Bagging") {
490 timer.DrawProgressBar( fBoostNum );
491 fBoostNum = fCurrentMethodIdx+1;
492 Log() << kINFO <<
"Error rate has reached 0.5 ("<< fMethodError<<
"), boosting process stopped at #" << fBoostNum <<
" classifier" << Endl;
494 Log() << kINFO <<
"The classifier might be too strong to boost with Beta = " << fAdaBoostBeta <<
", try reducing it." <<Endl;
504 Timer* timer1=
new Timer( fBoostNum, GetName() );
506 for (fCurrentMethodIdx=0;fCurrentMethodIdx<fBoostNum;fCurrentMethodIdx++) {
509 timer1->DrawProgressBar( fCurrentMethodIdx );
511 if (fCurrentMethodIdx==fBoostNum) {
512 Log() << kINFO <<
"Elapsed time: " << timer1->GetElapsedTime()
516 TH1F* tmp =
dynamic_cast<TH1F*
>( results->GetHist(
"ClassifierWeight") );
517 if (tmp) tmp->SetBinContent(fCurrentMethodIdx+1,fMethodWeight[fCurrentMethodIdx]);
526 if (fMethods.size()==1) fMethodWeight[0] = 1.0;
528 MonitorBoost(Types::kBoostProcEnd);
535 void TMVA::MethodBoost::CleanBoostOptions()
537 fBoostedMethodOptions=GetOptions();
542 void TMVA::MethodBoost::CreateMVAHistorgrams()
544 if (fBoostNum <=0) Log() << kFATAL <<
"CreateHistograms called before fBoostNum is initialized" << Endl;
547 Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
548 Int_t signalClass = 0;
549 if (DataInfo().GetClassInfo(
"Signal") != 0) {
550 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
552 gTools().ComputeStat( GetEventCollection( Types::kMaxTreeType ), fMVAvalues,
553 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
555 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
556 xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
557 xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.00001;
560 for (UInt_t imtd=0; imtd<fBoostNum; imtd++) {
561 fTrainSigMVAHist .push_back(
new TH1F( Form(
"MVA_Train_S_%04i",imtd),
"MVA_Train_S", fNbins, xmin, xmax ) );
562 fTrainBgdMVAHist .push_back(
new TH1F( Form(
"MVA_Train_B%04i", imtd),
"MVA_Train_B", fNbins, xmin, xmax ) );
563 fBTrainSigMVAHist.push_back(
new TH1F( Form(
"MVA_BTrain_S%04i",imtd),
"MVA_BoostedTrain_S", fNbins, xmin, xmax ) );
564 fBTrainBgdMVAHist.push_back(
new TH1F( Form(
"MVA_BTrain_B%04i",imtd),
"MVA_BoostedTrain_B", fNbins, xmin, xmax ) );
565 fTestSigMVAHist .push_back(
new TH1F( Form(
"MVA_Test_S%04i", imtd),
"MVA_Test_S", fNbins, xmin, xmax ) );
566 fTestBgdMVAHist .push_back(
new TH1F( Form(
"MVA_Test_B%04i", imtd),
"MVA_Test_B", fNbins, xmin, xmax ) );
573 void TMVA::MethodBoost::ResetBoostWeights()
575 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
576 const Event *ev = Data()->GetEvent(ievt);
577 ev->SetBoostWeight( 1.0 );
583 void TMVA::MethodBoost::WriteMonitoringHistosToFile(
void )
const
586 if (fMonitorBoostedMethod) {
587 for (UInt_t imtd=0;imtd<fBoostNum;imtd++) {
590 MethodBase* m =
dynamic_cast<MethodBase*
>(fMethods[imtd]);
594 fTrainSigMVAHist[imtd]->SetDirectory(dir);
595 fTrainSigMVAHist[imtd]->Write();
596 fTrainBgdMVAHist[imtd]->SetDirectory(dir);
597 fTrainBgdMVAHist[imtd]->Write();
598 fBTrainSigMVAHist[imtd]->SetDirectory(dir);
599 fBTrainSigMVAHist[imtd]->Write();
600 fBTrainBgdMVAHist[imtd]->SetDirectory(dir);
601 fBTrainBgdMVAHist[imtd]->Write();
608 fMonitorTree->Write();
613 void TMVA::MethodBoost::TestClassification()
615 MethodBase::TestClassification();
616 if (fMonitorBoostedMethod) {
617 UInt_t nloop = fTestSigMVAHist.size();
618 if (fMethods.size()<nloop) nloop = fMethods.size();
620 Data()->SetCurrentType(Types::kTesting);
621 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
622 const Event* ev = GetEvent(ievt);
623 Float_t w = ev->GetWeight();
624 if (DataInfo().IsSignal(ev)) {
625 for (UInt_t imtd=0; imtd<nloop; imtd++) {
626 fTestSigMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
630 for (UInt_t imtd=0; imtd<nloop; imtd++) {
631 fTestBgdMVAHist[imtd]->Fill(fMethods[imtd]->GetMvaValue(),w);
635 Data()->SetCurrentType(Types::kTraining);
641 void TMVA::MethodBoost::WriteEvaluationHistosToFile(Types::ETreeType treetype)
643 MethodBase::WriteEvaluationHistosToFile(treetype);
644 if (treetype==Types::kTraining)
return;
645 UInt_t nloop = fTestSigMVAHist.size();
646 if (fMethods.size()<nloop) nloop = fMethods.size();
647 if (fMonitorBoostedMethod) {
649 for (UInt_t imtd=0;imtd<nloop;imtd++) {
651 MethodBase* mva =
dynamic_cast<MethodBase*
>(fMethods[imtd]);
653 dir = mva->BaseDir();
654 if (dir==0)
continue;
656 fTestSigMVAHist[imtd]->SetDirectory(dir);
657 fTestSigMVAHist[imtd]->Write();
658 fTestBgdMVAHist[imtd]->SetDirectory(dir);
659 fTestBgdMVAHist[imtd]->Write();
667 void TMVA::MethodBoost::ProcessOptions()
674 void TMVA::MethodBoost::SingleTrain()
676 Data()->SetCurrentType(Types::kTraining);
677 MethodBase* meth =
dynamic_cast<MethodBase*
>(GetLastMethod());
679 meth->SetSilentFile(IsSilentFile());
680 if(IsModelPersistence()){
681 TString _fFileDir= DataInfo().GetName();
682 _fFileDir+=
"/"+gConfig().GetIONames().fWeightFileDir;
683 meth->SetWeightFileDir(_fFileDir);
685 meth->SetModelPersistence(IsModelPersistence());
694 void TMVA::MethodBoost::FindMVACut(MethodBase *method)
696 if (!method || method->GetMethodType() == Types::kDT ){
return;}
699 const Int_t nBins=10001;
700 Double_t minMVA=150000;
701 Double_t maxMVA=-150000;
702 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
704 Double_t val=method->GetMvaValue();
706 if (val>maxMVA) maxMVA=val;
707 if (val<minMVA) minMVA=val;
709 maxMVA = maxMVA+(maxMVA-minMVA)/nBins;
713 TH1D *mvaS =
new TH1D(Form(
"MVAS_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
714 TH1D *mvaB =
new TH1D(Form(
"MVAB_%d",fCurrentMethodIdx) ,
"",nBins,minMVA,maxMVA);
715 TH1D *mvaSC =
new TH1D(Form(
"MVASC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
716 TH1D *mvaBC =
new TH1D(Form(
"MVABC_%d",fCurrentMethodIdx),
"",nBins,minMVA,maxMVA);
719 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
720 if (fDetailedMonitoring){
721 results->Store(mvaS, Form(
"MVAS_%d",fCurrentMethodIdx));
722 results->Store(mvaB, Form(
"MVAB_%d",fCurrentMethodIdx));
723 results->Store(mvaSC,Form(
"MVASC_%d",fCurrentMethodIdx));
724 results->Store(mvaBC,Form(
"MVABC_%d",fCurrentMethodIdx));
727 for (Long64_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
729 Double_t weight = GetEvent(ievt)->GetWeight();
730 Double_t mvaVal=method->GetMvaValue();
732 if (DataInfo().IsSignal(GetEvent(ievt))){
733 mvaS->Fill(mvaVal,weight);
735 mvaB->Fill(mvaVal,weight);
738 SeparationBase *sepGain;
753 sepGain =
new GiniIndex();
756 Double_t sTot = mvaS->GetSum();
757 Double_t bTot = mvaB->GetSum();
759 mvaSC->SetBinContent(1,mvaS->GetBinContent(1));
760 mvaBC->SetBinContent(1,mvaB->GetBinContent(1));
763 Double_t separationGain=sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
764 Double_t mvaCut=mvaSC->GetBinLowEdge(1);
765 Double_t sSelCut=sSel;
766 Double_t bSelCut=bSel;
770 Double_t mvaCutOrientation=1;
771 for (Int_t ibin=1;ibin<=nBins;ibin++){
772 mvaSC->SetBinContent(ibin,mvaS->GetBinContent(ibin)+mvaSC->GetBinContent(ibin-1));
773 mvaBC->SetBinContent(ibin,mvaB->GetBinContent(ibin)+mvaBC->GetBinContent(ibin-1));
775 sSel=mvaSC->GetBinContent(ibin);
776 bSel=mvaBC->GetBinContent(ibin);
782 if (separationGain < sepGain->GetSeparationGain(sSel,bSel,sTot,bTot)
785 separationGain = sepGain->GetSeparationGain(sSel,bSel,sTot,bTot);
787 mvaCut=mvaSC->GetBinLowEdge(ibin+1);
789 if (sSel*(bTot-bSel) > (sTot-sSel)*bSel) mvaCutOrientation=-1;
790 else mvaCutOrientation=1;
815 double parentIndex=sepGain->GetSeparationIndex(sTot,bTot);
816 double leftIndex =sepGain->GetSeparationIndex(sSelCut,bSelCut);
817 double rightIndex =sepGain->GetSeparationIndex(sTot-sSelCut,bTot-bSelCut);
823 <<
" s2="<<(sTot-sSelCut)
824 <<
" b2="<<(bTot-bSelCut)
825 <<
" s/b(1)=" << sSelCut/bSelCut
826 <<
" s/b(2)=" << (sTot-sSelCut)/(bTot-bSelCut)
827 <<
" index before cut=" << parentIndex
828 <<
" after: left=" << leftIndex
829 <<
" after: right=" << rightIndex
830 <<
" sepGain=" << parentIndex-( (sSelCut+bSelCut) * leftIndex + (sTot-sSelCut+bTot-bSelCut) * rightIndex )/(sTot+bTot)
831 <<
" sepGain="<<separationGain
832 <<
" sepGain="<<sepGain->GetSeparationGain(sSelCut,bSelCut,sTot,bTot)
834 <<
" idx="<<fCurrentMethodIdx
835 <<
" cutOrientation="<<mvaCutOrientation
838 method->SetSignalReferenceCut(mvaCut);
839 method->SetSignalReferenceCutOrientation(mvaCutOrientation);
841 results->GetHist(
"SeparationGain")->SetBinContent(fCurrentMethodIdx+1,separationGain);
844 Log() << kDEBUG <<
"(old step) Setting method cut to " <<method->GetSignalReferenceCut()<< Endl;
857 Double_t TMVA::MethodBoost::SingleBoost(MethodBase* method)
859 Double_t returnVal=-1;
862 if (fBoostType==
"AdaBoost") returnVal = this->AdaBoost (method,1);
863 else if (fBoostType==
"RealAdaBoost") returnVal = this->AdaBoost (method,0);
864 else if (fBoostType==
"Bagging") returnVal = this->Bagging ();
866 Log() << kFATAL <<
"<Boost> unknown boost option " << fBoostType<<
" called" << Endl;
868 fMethodWeight.push_back(returnVal);
874 Double_t TMVA::MethodBoost::AdaBoost(MethodBase* method, Bool_t discreteAdaBoost)
877 Log() << kWARNING <<
" AdaBoost called without classifier reference - needed for calculating AdaBoost " << Endl;
881 Float_t w,v; Bool_t sig=kTRUE;
882 Double_t sumAll=0, sumWrong=0;
883 Bool_t* WrongDetection=
new Bool_t[GetNEvents()];
884 QuickMVAProbEstimator *MVAProb=NULL;
886 if (discreteAdaBoost) {
888 Log() << kDEBUG <<
" individual mva cut value = " << method->GetSignalReferenceCut() << Endl;
890 MVAProb=
new TMVA::QuickMVAProbEstimator();
897 for (Long64_t evt=0; evt<GetNEvents(); evt++) {
898 const Event* ev = Data()->GetEvent(evt);
899 MVAProb->AddEvent(fMVAvalues->at(evt),ev->GetWeight(),ev->GetClass());
904 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) WrongDetection[ievt]=kTRUE;
907 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
908 const Event* ev = GetEvent(ievt);
909 sig=DataInfo().IsSignal(ev);
910 v = fMVAvalues->at(ievt);
915 if (fMonitorBoostedMethod) {
917 fBTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,w);
918 fTrainSigMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
921 fBTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,w);
922 fTrainBgdMVAHist[fCurrentMethodIdx]->Fill(v,ev->GetOriginalWeight());
927 if (discreteAdaBoost){
928 if (sig == method->IsSignalLike(fMVAvalues->at(ievt))){
929 WrongDetection[ievt]=kFALSE;
931 WrongDetection[ievt]=kTRUE;
935 Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
936 mvaProb = 2*(mvaProb-0.5);
938 if (DataInfo().IsSignal(ev)) trueType = 1;
940 sumWrong+= w*trueType*mvaProb;
944 fMethodError=sumWrong/sumAll;
949 Double_t boostWeight=0;
951 if (fMethodError == 0) {
952 Log() << kWARNING <<
"Your classifier worked perfectly on the training sample --> serious overtraining expected and no boosting done " << Endl;
955 if (discreteAdaBoost)
956 boostWeight = TMath::Log((1.-fMethodError)/fMethodError)*fAdaBoostBeta;
958 boostWeight = TMath::Log((1.+fMethodError)/(1-fMethodError))*fAdaBoostBeta;
968 Double_t newSum=0., oldSum=0.;
971 Double_t boostfactor = TMath::Exp(boostWeight);
974 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
975 const Event* ev = Data()->GetEvent(ievt);
976 oldSum += ev->GetWeight();
977 if (discreteAdaBoost){
979 if (WrongDetection[ievt] && boostWeight != 0) {
980 if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
981 else ev->ScaleBoostWeight(1./boostfactor);
989 Double_t mvaProb = MVAProb->GetMVAProbAt((Float_t)fMVAvalues->at(ievt));
990 mvaProb = 2*(mvaProb-0.5);
994 if (DataInfo().IsSignal(ev)) trueType = 1;
997 boostfactor = TMath::Exp(-1*boostWeight*trueType*mvaProb);
998 if (ev->GetWeight() > 0) ev->ScaleBoostWeight(boostfactor);
999 else ev->ScaleBoostWeight(1./boostfactor);
1002 newSum += ev->GetWeight();
1005 Double_t normWeight = oldSum/newSum;
1007 Double_t normSig=0, normBkg=0;
1008 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1009 const Event* ev = Data()->GetEvent(ievt);
1010 ev->ScaleBoostWeight(normWeight);
1011 if (ev->GetClass()) normSig+=ev->GetWeight();
1012 else normBkg+=ev->GetWeight();
1015 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1016 results->GetHist(
"SoverBtotal")->SetBinContent(fCurrentMethodIdx+1, normSig/normBkg);
1018 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1019 const Event* ev = Data()->GetEvent(ievt);
1021 if (ev->GetClass()) ev->ScaleBoostWeight(oldSum/normSig/2);
1022 else ev->ScaleBoostWeight(oldSum/normBkg/2);
1026 delete[] WrongDetection;
1027 if (MVAProb)
delete MVAProb;
1029 fBoostWeight = boostWeight;
1038 Double_t TMVA::MethodBoost::Bagging()
1040 TRandom3 *trandom =
new TRandom3(fRandomSeed+fMethods.size());
1041 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1042 const Event* ev = Data()->GetEvent(ievt);
1043 ev->SetBoostWeight(trandom->PoissonD(fBaggedSampleFraction));
1056 void TMVA::MethodBoost::GetHelpMessage()
const
1059 Log() << gTools().Color(
"bold") <<
"--- Short description:" << gTools().Color(
"reset") << Endl;
1061 Log() <<
"This method combines several classifier of one species in a "<<Endl;
1062 Log() <<
"single multivariate quantity via the boost algorithm." << Endl;
1063 Log() <<
"the output is a weighted sum over all individual classifiers" <<Endl;
1064 Log() <<
"By default, the AdaBoost method is employed, which gives " << Endl;
1065 Log() <<
"events that were misclassified in the previous tree a larger " << Endl;
1066 Log() <<
"weight in the training of the following classifier."<<Endl;
1067 Log() <<
"Optionally, Bagged boosting can also be applied." << Endl;
1069 Log() << gTools().Color(
"bold") <<
"--- Performance tuning via configuration options:" << gTools().Color(
"reset") << Endl;
1071 Log() <<
"The most important parameter in the configuration is the "<<Endl;
1072 Log() <<
"number of boosts applied (Boost_Num) and the choice of boosting"<<Endl;
1073 Log() <<
"(Boost_Type), which can be set to either AdaBoost or Bagging." << Endl;
1074 Log() <<
"AdaBoosting: The most important parameters in this configuration" <<Endl;
1075 Log() <<
"is the beta parameter (Boost_AdaBoostBeta) " << Endl;
1076 Log() <<
"When boosting a linear classifier, it is sometimes advantageous"<<Endl;
1077 Log() <<
"to transform the MVA output non-linearly. The following options" <<Endl;
1078 Log() <<
"are available: step, log, and minmax, the default is no transform."<<Endl;
1080 Log() <<
"Some classifiers are hard to boost and do not improve much in"<<Endl;
1081 Log() <<
"their performance by boosting them, some even slightly deteriorate"<< Endl;
1082 Log() <<
"due to the boosting." <<Endl;
1083 Log() <<
"The booking of the boost method is special since it requires"<<Endl;
1084 Log() <<
"the booing of the method to be boosted and the boost itself."<<Endl;
1085 Log() <<
"This is solved by booking the method to be boosted and to add"<<Endl;
1086 Log() <<
"all Boost parameters, which all begin with \"Boost_\" to the"<<Endl;
1087 Log() <<
"options string. The factory separates the options and initiates"<<Endl;
1088 Log() <<
"the boost process. The TMVA macro directory contains the example"<<Endl;
1089 Log() <<
"macro \"Boost.C\"" <<Endl;
1094 const TMVA::Ranking* TMVA::MethodBoost::CreateRanking()
1102 Double_t TMVA::MethodBoost::GetMvaValue( Double_t* err, Double_t* errUpper )
1104 Double_t mvaValue = 0;
1106 Double_t epsilon = TMath::Exp(-1.);
1108 for (UInt_t i=0;i< fMethods.size(); i++){
1109 MethodBase* m =
dynamic_cast<MethodBase*
>(fMethods[i]);
1111 Double_t val = fTmpEvent ? m->GetMvaValue(fTmpEvent) : m->GetMvaValue();
1112 Double_t sigcut = m->GetSignalReferenceCut();
1115 if (fTransformString ==
"linear"){
1118 else if (fTransformString ==
"log"){
1119 if (val < sigcut) val = sigcut;
1121 val = TMath::Log((val-sigcut)+epsilon);
1123 else if (fTransformString ==
"step" ){
1124 if (m->IsSignalLike(val)) val = 1.;
1127 else if (fTransformString ==
"gauss"){
1128 val = TMath::Gaus((val-sigcut),1);
1131 Log() << kFATAL <<
"error unknown transformation " << fTransformString<<Endl;
1133 mvaValue+=val*fMethodWeight[i];
1134 norm +=fMethodWeight[i];
1139 NoErrorCalc(err, errUpper);
1163 Double_t TMVA::MethodBoost::GetBoostROCIntegral(Bool_t singleMethod, Types::ETreeType eTT, Bool_t CalcOverlapIntergral)
1166 Data()->SetCurrentType(eTT);
1168 MethodBase* method = singleMethod ?
dynamic_cast<MethodBase*
>(fMethods.back()) : 0;
1172 if (singleMethod && !method) {
1173 Log() << kFATAL <<
" What do you do? Your method:"
1174 << fMethods.back()->GetName()
1175 <<
" seems not to be a propper TMVA method"
1184 std::vector<Double_t> OldMethodWeight(fMethodWeight);
1185 if (!singleMethod) {
1187 Double_t AllMethodsWeight = 0;
1188 for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1189 AllMethodsWeight += fMethodWeight.at(i);
1191 if (AllMethodsWeight != 0.0) {
1192 for (UInt_t i=0; i<=fCurrentMethodIdx; i++)
1193 fMethodWeight[i] /= AllMethodsWeight;
1198 Double_t meanS, meanB, rmsS, rmsB, xmin, xmax, nrms = 10;
1199 std::vector <Float_t>* mvaRes;
1200 if (singleMethod && eTT==Types::kTraining)
1201 mvaRes = fMVAvalues;
1203 mvaRes =
new std::vector <Float_t>(GetNEvents());
1204 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1206 (*mvaRes)[ievt] = singleMethod ? method->GetMvaValue(&err) : GetMvaValue(&err);
1212 fMethodWeight = OldMethodWeight;
1215 Int_t signalClass = 0;
1216 if (DataInfo().GetClassInfo(
"Signal") != 0) {
1217 signalClass = DataInfo().GetClassInfo(
"Signal")->GetNumber();
1219 gTools().ComputeStat( GetEventCollection(eTT), mvaRes,
1220 meanS, meanB, rmsS, rmsB, xmin, xmax, signalClass );
1222 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
1223 xmin = TMath::Max( TMath::Min(meanS - nrms*rmsS, meanB - nrms*rmsB ), xmin );
1224 xmax = TMath::Min( TMath::Max(meanS + nrms*rmsS, meanB + nrms*rmsB ), xmax ) + 0.0001;
1227 TH1* mva_s =
new TH1F(
"MVA_S",
"MVA_S", fNbins, xmin, xmax );
1228 TH1* mva_b =
new TH1F(
"MVA_B",
"MVA_B", fNbins, xmin, xmax );
1229 TH1 *mva_s_overlap=0, *mva_b_overlap=0;
1230 if (CalcOverlapIntergral) {
1231 mva_s_overlap =
new TH1F(
"MVA_S_OVERLAP",
"MVA_S_OVERLAP", fNbins, xmin, xmax );
1232 mva_b_overlap =
new TH1F(
"MVA_B_OVERLAP",
"MVA_B_OVERLAP", fNbins, xmin, xmax );
1234 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1235 const Event* ev = GetEvent(ievt);
1236 Float_t w = (eTT==Types::kTesting ? ev->GetWeight() : ev->GetOriginalWeight());
1237 if (DataInfo().IsSignal(ev)) mva_s->Fill( (*mvaRes)[ievt], w );
1238 else mva_b->Fill( (*mvaRes)[ievt], w );
1240 if (CalcOverlapIntergral) {
1241 Float_t w_ov = ev->GetWeight();
1242 if (DataInfo().IsSignal(ev))
1243 mva_s_overlap->Fill( (*mvaRes)[ievt], w_ov );
1245 mva_b_overlap->Fill( (*mvaRes)[ievt], w_ov );
1248 gTools().NormHist( mva_s );
1249 gTools().NormHist( mva_b );
1250 PDF *fS =
new PDF(
"PDF Sig", mva_s, PDF::kSpline2 );
1251 PDF *fB =
new PDF(
"PDF Bkg", mva_b, PDF::kSpline2 );
1254 Double_t ROC = MethodBase::GetROCIntegral(fS, fB);
1257 if (CalcOverlapIntergral) {
1258 gTools().NormHist( mva_s_overlap );
1259 gTools().NormHist( mva_b_overlap );
1261 fOverlap_integral = 0.0;
1262 for (Int_t bin=1; bin<=mva_s_overlap->GetNbinsX(); bin++){
1263 Double_t bc_s = mva_s_overlap->GetBinContent(bin);
1264 Double_t bc_b = mva_b_overlap->GetBinContent(bin);
1265 if (bc_s > 0.0 && bc_b > 0.0)
1266 fOverlap_integral += TMath::Min(bc_s, bc_b);
1269 delete mva_s_overlap;
1270 delete mva_b_overlap;
1277 if (!(singleMethod && eTT==Types::kTraining))
delete mvaRes;
1279 Data()->SetCurrentType(Types::kTraining);
1284 void TMVA::MethodBoost::CalcMVAValues()
1289 Data()->SetCurrentType(Types::kTraining);
1290 MethodBase* method =
dynamic_cast<MethodBase*
>(fMethods.back());
1292 Log() << kFATAL <<
"dynamic cast to MethodBase* failed" <<Endl;
1296 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1298 fMVAvalues->at(ievt) = method->GetMvaValue();
1312 void TMVA::MethodBoost::MonitorBoost( Types::EBoostStage stage , UInt_t methodIndex )
1314 Results* results = Data()->GetResults(GetMethodName(), Types::kTraining, GetAnalysisType());
1316 if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kDT) {
1317 TMVA::MethodDT* currentDT=
dynamic_cast<TMVA::MethodDT*
>(GetCurrentMethod(methodIndex));
1319 if (stage == Types::kBoostProcBegin){
1320 results->Store(
new TH1I(
"NodesBeforePruning",
"nodes before pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesBeforePruning");
1321 results->Store(
new TH1I(
"NodesAfterPruning",
"nodes after pruning",this->GetBoostNum(),0,this->GetBoostNum()),
"NodesAfterPruning");
1324 if (stage == Types::kBeforeTraining){
1326 else if (stage == Types::kBeforeBoosting){
1327 results->GetHist(
"NodesBeforePruning")->SetBinContent(methodIndex+1,currentDT->GetNNodesBeforePruning());
1328 results->GetHist(
"NodesAfterPruning")->SetBinContent(methodIndex+1,currentDT->GetNNodes());
1330 else if (stage == Types::kAfterBoosting){
1333 else if (stage != Types::kBoostProcEnd){
1334 Log() << kINFO <<
"<Train> average number of nodes before/after pruning : "
1335 << results->GetHist(
"NodesBeforePruning")->GetMean() <<
" / "
1336 << results->GetHist(
"NodesAfterPruning")->GetMean()
1341 }
else if (GetCurrentMethod(methodIndex)->GetMethodType() == TMVA::Types::kFisher) {
1342 if (stage == Types::kAfterBoosting){
1343 TMVA::MsgLogger::EnableOutput();
1346 if (methodIndex < 3){
1347 Log() << kDEBUG <<
"No detailed boost monitoring for "
1348 << GetCurrentMethod(methodIndex)->GetMethodName()
1349 <<
" yet available " << Endl;
1355 if (stage == Types::kBeforeBoosting){
1357 if (fDetailedMonitoring){
1359 if (DataInfo().GetNVariables() == 2) {
1360 results->Store(
new TH2F(Form(
"EventDistSig_%d",methodIndex),Form(
"EventDistSig_%d",methodIndex),100,0,7,100,0,7));
1361 results->GetHist(Form(
"EventDistSig_%d",methodIndex))->SetMarkerColor(4);
1362 results->Store(
new TH2F(Form(
"EventDistBkg_%d",methodIndex),Form(
"EventDistBkg_%d",methodIndex),100,0,7,100,0,7));
1363 results->GetHist(Form(
"EventDistBkg_%d",methodIndex))->SetMarkerColor(2);
1365 Data()->SetCurrentType(Types::kTraining);
1366 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1367 const Event* ev = GetEvent(ievt);
1368 Float_t w = ev->GetWeight();
1369 Float_t v0= ev->GetValue(0);
1370 Float_t v1= ev->GetValue(1);
1373 if (DataInfo().IsSignal(ev)) h=results->GetHist2D(Form(
"EventDistSig_%d",methodIndex));
1374 else h=results->GetHist2D(Form(
"EventDistBkg_%d",methodIndex));
1375 if (h) h->Fill(v0,v1,w);