120 REGISTER_METHOD(Cuts)
122 ClassImp(TMVA::MethodCuts);
124 const Double_t TMVA::MethodCuts::fgMaxAbsCutVal = 1.0e30;
129 TMVA::MethodCuts::MethodCuts( const TString& jobName,
130 const TString& methodTitle,
131 DataSetInfo& theData,
132 const TString& theOption ) :
133 MethodBase( jobName, Types::kCuts, methodTitle, theData, theOption),
134 fFitMethod ( kUseGeneticAlgorithm ),
135 fEffMethod ( kUseEventSelection ),
160 fVarHistS_smooth( 0 ),
161 fVarHistB_smooth( 0 ),
164 fNegEffWarning( kFALSE )
171 TMVA::MethodCuts::MethodCuts( DataSetInfo& theData,
172 const TString& theWeightFile) :
173 MethodBase( Types::kCuts, theData, theWeightFile),
174 fFitMethod ( kUseGeneticAlgorithm ),
175 fEffMethod ( kUseEventSelection ),
200 fVarHistS_smooth( 0 ),
201 fVarHistB_smooth( 0 ),
204 fNegEffWarning( kFALSE )
211 Bool_t TMVA::MethodCuts::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses,
214 return (type == Types::kClassification && numberClasses == 2);
220 void TMVA::MethodCuts::Init(
void )
222 fVarHistS = fVarHistB = 0;
223 fVarHistS_smooth = fVarHistB_smooth = 0;
224 fVarPdfS = fVarPdfB = 0;
226 fBinaryTreeS = fBinaryTreeB = 0;
232 fRangeSign =
new std::vector<Int_t> ( GetNvar() );
233 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fRangeSign)[ivar] = +1;
235 fMeanS =
new std::vector<Double_t>( GetNvar() );
236 fMeanB =
new std::vector<Double_t>( GetNvar() );
237 fRmsS =
new std::vector<Double_t>( GetNvar() );
238 fRmsB =
new std::vector<Double_t>( GetNvar() );
241 fFitParams =
new std::vector<EFitParameters>( GetNvar() );
242 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*fFitParams)[ivar] = kNotEnforced;
244 fFitMethod = kUseMonteCarlo;
248 fCutMin =
new Double_t*[GetNvar()];
249 fCutMax =
new Double_t*[GetNvar()];
250 for (UInt_t i=0; i<GetNvar(); i++) {
251 fCutMin[i] =
new Double_t[fNbins];
252 fCutMax[i] =
new Double_t[fNbins];
256 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
257 for (Int_t ibin=0; ibin<fNbins; ibin++) {
258 fCutMin[ivar][ibin] = 0;
259 fCutMax[ivar][ibin] = 0;
263 fTmpCutMin =
new Double_t[GetNvar()];
264 fTmpCutMax =
new Double_t[GetNvar()];
270 TMVA::MethodCuts::~MethodCuts(
void )
278 delete fEffBvsSLocal;
280 if (NULL != fCutRangeMin)
delete [] fCutRangeMin;
281 if (NULL != fCutRangeMax)
delete [] fCutRangeMax;
282 if (NULL != fAllVarsI)
delete [] fAllVarsI;
284 for (UInt_t i=0;i<GetNvar();i++) {
285 if (NULL != fCutMin[i] )
delete [] fCutMin[i];
286 if (NULL != fCutMax[i] )
delete [] fCutMax[i];
287 if (NULL != fCutRange[i])
delete fCutRange[i];
290 if (NULL != fCutMin)
delete [] fCutMin;
291 if (NULL != fCutMax)
delete [] fCutMax;
293 if (NULL != fTmpCutMin)
delete [] fTmpCutMin;
294 if (NULL != fTmpCutMax)
delete [] fTmpCutMax;
296 if (NULL != fBinaryTreeS)
delete fBinaryTreeS;
297 if (NULL != fBinaryTreeB)
delete fBinaryTreeB;
319 void TMVA::MethodCuts::DeclareOptions()
321 DeclareOptionRef(fFitMethodS =
"GA",
"FitMethod",
"Minimisation Method (GA, SA, and MC are the primary methods to be used; the others have been introduced for testing purposes and are depreciated)");
322 AddPreDefVal(TString(
"GA"));
323 AddPreDefVal(TString(
"SA"));
324 AddPreDefVal(TString(
"MC"));
325 AddPreDefVal(TString(
"MCEvents"));
326 AddPreDefVal(TString(
"MINUIT"));
327 AddPreDefVal(TString(
"EventScan"));
330 DeclareOptionRef(fEffMethodS =
"EffSel",
"EffMethod",
"Selection Method");
331 AddPreDefVal(TString(
"EffSel"));
332 AddPreDefVal(TString(
"EffPDF"));
335 fCutRange.resize(GetNvar());
336 fCutRangeMin =
new Double_t[GetNvar()];
337 fCutRangeMax =
new Double_t[GetNvar()];
338 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
340 fCutRangeMin[ivar] = fCutRangeMax[ivar] = -1;
343 DeclareOptionRef( fCutRangeMin, GetNvar(),
"CutRangeMin",
"Minimum of allowed cut range (set per variable)" );
344 DeclareOptionRef( fCutRangeMax, GetNvar(),
"CutRangeMax",
"Maximum of allowed cut range (set per variable)" );
346 fAllVarsI =
new TString[GetNvar()];
348 for (UInt_t i=0; i<GetNvar(); i++) fAllVarsI[i] =
"NotEnforced";
350 DeclareOptionRef(fAllVarsI, GetNvar(),
"VarProp",
"Categorisation of cuts");
351 AddPreDefVal(TString(
"NotEnforced"));
352 AddPreDefVal(TString(
"FMax"));
353 AddPreDefVal(TString(
"FMin"));
354 AddPreDefVal(TString(
"FSmart"));
363 void TMVA::MethodCuts::ProcessOptions()
365 if (IsNormalised()) {
366 Log() << kWARNING <<
"Normalisation of the input variables for cut optimisation is not" << Endl;
367 Log() << kWARNING <<
"supported because this provides intransparent cut values, and no" << Endl;
368 Log() << kWARNING <<
"improvement in the performance of the algorithm." << Endl;
369 Log() << kWARNING <<
"Please remove \"Normalise\" option from booking option string" << Endl;
370 Log() << kWARNING <<
"==> Will reset normalisation flag to \"False\"" << Endl;
371 SetNormalised( kFALSE );
374 if (IgnoreEventsWithNegWeightsInTraining()) {
375 Log() << kFATAL <<
"Mechanism to ignore events with negative weights in training not yet available for method: "
376 << GetMethodTypeName()
377 <<
" --> Please remove \"IgnoreNegWeightsInTraining\" option from booking string."
381 if (fFitMethodS ==
"MC" ) fFitMethod = kUseMonteCarlo;
382 else if (fFitMethodS ==
"MCEvents") fFitMethod = kUseMonteCarloEvents;
383 else if (fFitMethodS ==
"GA" ) fFitMethod = kUseGeneticAlgorithm;
384 else if (fFitMethodS ==
"SA" ) fFitMethod = kUseSimulatedAnnealing;
385 else if (fFitMethodS ==
"MINUIT" ) {
386 fFitMethod = kUseMinuit;
387 Log() << kWARNING <<
"poor performance of MINUIT in MethodCuts; preferred fit method: GA" << Endl;
389 else if (fFitMethodS ==
"EventScan" ) fFitMethod = kUseEventScan;
390 else Log() << kFATAL <<
"unknown minimisation method: " << fFitMethodS << Endl;
392 if (fEffMethodS ==
"EFFSEL" ) fEffMethod = kUseEventSelection;
393 else if (fEffMethodS ==
"EFFPDF" ) fEffMethod = kUsePDFs;
394 else fEffMethod = kUseEventSelection;
397 Log() << kINFO << Form(
"Use optimization method: \"%s\"",
398 (fFitMethod == kUseMonteCarlo) ?
"Monte Carlo" :
399 (fFitMethod == kUseMonteCarlo) ?
"Monte-Carlo-Event sampling" :
400 (fFitMethod == kUseEventScan) ?
"Full Event Scan (slow)" :
401 (fFitMethod == kUseMinuit) ?
"MINUIT" :
"Genetic Algorithm" ) << Endl;
402 Log() << kINFO << Form(
"Use efficiency computation method: \"%s\"",
403 (fEffMethod == kUseEventSelection) ?
"Event Selection" :
"PDF" ) << Endl;
406 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
407 fCutRange[ivar] =
new Interval( fCutRangeMin[ivar], fCutRangeMax[ivar] );
411 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
412 EFitParameters theFitP = kNotEnforced;
413 if (fAllVarsI[ivar] ==
"" || fAllVarsI[ivar] ==
"NotEnforced") theFitP = kNotEnforced;
414 else if (fAllVarsI[ivar] ==
"FMax" ) theFitP = kForceMax;
415 else if (fAllVarsI[ivar] ==
"FMin" ) theFitP = kForceMin;
416 else if (fAllVarsI[ivar] ==
"FSmart" ) theFitP = kForceSmart;
418 Log() << kFATAL <<
"unknown value \'" << fAllVarsI[ivar]
419 <<
"\' for fit parameter option " << Form(
"VarProp[%i]",ivar) << Endl;
421 (*fFitParams)[ivar] = theFitP;
423 if (theFitP != kNotEnforced)
424 Log() << kINFO <<
"Use \"" << fAllVarsI[ivar]
425 <<
"\" cuts for variable: " <<
"'" << (*fInputVars)[ivar] <<
"'" << Endl;
432 Double_t TMVA::MethodCuts::GetMvaValue( Double_t* err, Double_t* errUpper )
435 NoErrorCalc(err, errUpper);
438 if (fCutMin == NULL || fCutMax == NULL || fNbins == 0) {
439 Log() << kFATAL <<
"<Eval_Cuts> fCutMin/Max have zero pointer. "
440 <<
"Did you book Cuts ?" << Endl;
443 const Event* ev = GetEvent();
446 if (fTestSignalEff > 0) {
448 Int_t ibin = fEffBvsSLocal->FindBin( fTestSignalEff );
449 if (ibin < 0 ) ibin = 0;
450 else if (ibin >= fNbins) ibin = fNbins - 1;
452 Bool_t passed = kTRUE;
453 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
454 passed &= ( (ev->GetValue(ivar) > fCutMin[ivar][ibin]) &&
455 (ev->GetValue(ivar) <= fCutMax[ivar][ibin]) );
457 return passed ? 1. : 0. ;
465 void TMVA::MethodCuts::PrintCuts( Double_t effS )
const
467 std::vector<Double_t> cutsMin;
468 std::vector<Double_t> cutsMax;
469 Int_t ibin = fEffBvsSLocal->FindBin( effS );
471 Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
474 std::vector<TString>* varVec = 0;
475 if (GetTransformationHandler().GetNumOfTransformations() == 0) {
477 varVec =
new std::vector<TString>;
478 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
479 varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() );
482 else if (GetTransformationHandler().GetNumOfTransformations() == 1) {
484 varVec = GetTransformationHandler().GetTransformationStringsOfLastTransform();
488 varVec =
new std::vector<TString>;
489 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
490 varVec->push_back( DataInfo().GetVariableInfo(ivar).GetLabel() +
" [transformed]" );
495 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
496 if ((UInt_t)(*varVec)[ivar].Length() > maxL) maxL = (*varVec)[ivar].Length();
498 UInt_t maxLine = 20+maxL+16;
500 for (UInt_t i=0; i<maxLine; i++) Log() <<
"-";
502 Log() << kHEADER <<
"Cut values for requested signal efficiency: " << trueEffS << Endl;
503 Log() << kINFO <<
"Corresponding background efficiency : " << fEffBvsSLocal->GetBinContent( ibin ) << Endl;
504 if (GetTransformationHandler().GetNumOfTransformations() == 1) {
505 Log() << kINFO <<
"Transformation applied to input variables : \""
506 << GetTransformationHandler().GetNameOfLastTransform() <<
"\"" << Endl;
508 else if (GetTransformationHandler().GetNumOfTransformations() > 1) {
509 Log() << kINFO <<
"[ More than one (=" << GetTransformationHandler().GetNumOfTransformations() <<
") "
510 <<
" transformations applied in transformation chain; cuts applied on transformed quantities ] " << Endl;
513 Log() << kINFO <<
"Transformation applied to input variables : None" << Endl;
515 for (UInt_t i=0; i<maxLine; i++) Log() <<
"-";
517 for (UInt_t ivar=0; ivar<cutsMin.size(); ivar++) {
519 <<
"Cut[" << std::setw(2) << ivar <<
"]: "
520 << std::setw(10) << cutsMin[ivar]
522 << std::setw(maxL) << (*varVec)[ivar]
524 << std::setw(10) << cutsMax[ivar] << Endl;
526 for (UInt_t i=0; i<maxLine; i++) Log() <<
"-";
536 Double_t TMVA::MethodCuts::GetCuts( Double_t effS, Double_t* cutMin, Double_t* cutMax )
const
538 std::vector<Double_t> cMin( GetNvar() );
539 std::vector<Double_t> cMax( GetNvar() );
540 Double_t trueEffS = GetCuts( effS, cMin, cMax );
541 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
542 cutMin[ivar] = cMin[ivar];
543 cutMax[ivar] = cMax[ivar];
551 Double_t TMVA::MethodCuts::GetCuts( Double_t effS,
552 std::vector<Double_t>& cutMin,
553 std::vector<Double_t>& cutMax )
const
556 Int_t ibin = fEffBvsSLocal->FindBin( effS );
559 Double_t trueEffS = fEffBvsSLocal->GetBinLowEdge( ibin );
562 if (ibin < 0 ) ibin = 0;
563 else if (ibin >= fNbins) ibin = fNbins - 1;
567 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
568 cutMin.push_back( fCutMin[ivar][ibin] );
569 cutMax.push_back( fCutMax[ivar][ibin] );
578 void TMVA::MethodCuts::Train(
void )
580 if (fEffMethod == kUsePDFs) CreateVariablePDFs();
583 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
584 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
590 fBinaryTreeS =
new BinarySearchTree();
591 fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
592 fBinaryTreeB =
new BinarySearchTree();
593 fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
595 for (UInt_t ivar =0; ivar < Data()->GetNVariables(); ivar++) {
596 (*fMeanS)[ivar] = fBinaryTreeS->Mean(Types::kSignal, ivar);
597 (*fRmsS)[ivar] = fBinaryTreeS->RMS (Types::kSignal, ivar);
598 (*fMeanB)[ivar] = fBinaryTreeB->Mean(Types::kBackground, ivar);
599 (*fRmsB)[ivar] = fBinaryTreeB->RMS (Types::kBackground, ivar);
602 Double_t xmin = TMath::Min(fBinaryTreeS->Min(Types::kSignal, ivar),
603 fBinaryTreeB->Min(Types::kBackground, ivar));
604 Double_t xmax = TMath::Max(fBinaryTreeS->Max(Types::kSignal, ivar),
605 fBinaryTreeB->Max(Types::kBackground, ivar));
608 Double_t eps = 0.01*(xmax - xmin);
612 if (TMath::Abs(fCutRange[ivar]->GetMin() - fCutRange[ivar]->GetMax()) < 1.0e-300 ) {
613 fCutRange[ivar]->SetMin( xmin );
614 fCutRange[ivar]->SetMax( xmax );
616 else if (xmin > fCutRange[ivar]->GetMin()) fCutRange[ivar]->SetMin( xmin );
617 else if (xmax < fCutRange[ivar]->GetMax()) fCutRange[ivar]->SetMax( xmax );
620 std::vector<TH1F*> signalDist, bkgDist;
623 delete fEffBvsSLocal;
624 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
625 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
626 fEffBvsSLocal->SetDirectory(0);
629 for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
632 if (fFitMethod == kUseGeneticAlgorithm ||
633 fFitMethod == kUseMonteCarlo ||
634 fFitMethod == kUseMinuit ||
635 fFitMethod == kUseSimulatedAnnealing) {
638 std::vector<Interval*> ranges;
640 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
643 if (DataInfo().GetVariableInfo(ivar).GetVarType() ==
'I') {
644 nbins = Int_t(fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin()) + 1;
647 if ((*fFitParams)[ivar] == kForceSmart) {
648 if ((*fMeanS)[ivar] > (*fMeanB)[ivar]) (*fFitParams)[ivar] = kForceMax;
649 else (*fFitParams)[ivar] = kForceMin;
652 if ((*fFitParams)[ivar] == kForceMin) {
653 ranges.push_back(
new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMin(), nbins ) );
654 ranges.push_back(
new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
656 else if ((*fFitParams)[ivar] == kForceMax) {
657 ranges.push_back(
new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
658 ranges.push_back(
new Interval( fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(),
659 fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
662 ranges.push_back(
new Interval( fCutRange[ivar]->GetMin(), fCutRange[ivar]->GetMax(), nbins ) );
663 ranges.push_back(
new Interval( 0, fCutRange[ivar]->GetMax() - fCutRange[ivar]->GetMin(), nbins ) );
668 FitterBase* fitter = NULL;
670 switch (fFitMethod) {
671 case kUseGeneticAlgorithm:
672 fitter =
new GeneticFitter( *
this, Form(
"%sFitter_GA", GetName()), ranges, GetOptions() );
675 fitter =
new MCFitter ( *
this, Form(
"%sFitter_MC", GetName()), ranges, GetOptions() );
678 fitter =
new MinuitFitter ( *
this, Form(
"%sFitter_MINUIT", GetName()), ranges, GetOptions() );
680 case kUseSimulatedAnnealing:
681 fitter =
new SimulatedAnnealingFitter( *
this, Form(
"%sFitter_SA", GetName()), ranges, GetOptions() );
684 Log() << kFATAL <<
"Wrong fit method: " << fFitMethod << Endl;
687 if (fInteractive) fitter->SetIPythonInteractive(&fExitFromTraining, &fIPyMaxIter, &fIPyCurrentIter);
689 fitter->CheckForUnusedOptions();
695 for (UInt_t ivar=0; ivar<ranges.size(); ivar++)
delete ranges[ivar];
700 else if (fFitMethod == kUseEventScan) {
702 Int_t nevents = Data()->GetNEvents();
706 Int_t nsamples = Int_t(0.5*nevents*(nevents - 1));
707 Timer timer( nsamples, GetName() );
708 fIPyMaxIter = nsamples;
710 Log() << kINFO <<
"Running full event scan: " << Endl;
711 for (Int_t ievt1=0; ievt1<nevents; ievt1++) {
712 for (Int_t ievt2=ievt1+1; ievt2<nevents; ievt2++) {
714 fIPyCurrentIter = ic;
715 if (fExitFromTraining)
break;
716 EstimatorFunction( ievt1, ievt2 );
720 if ((nsamples<10000) || ic%10000 == 0) timer.DrawProgressBar( ic );
725 else if (fFitMethod == kUseMonteCarloEvents) {
727 Int_t nsamples = 200000;
729 DeclareOptionRef( nsamples,
"SampleSize",
"Number of Monte-Carlo-Event samples" );
730 DeclareOptionRef( seed,
"Seed",
"Seed for the random generator (0 takes random seeds)" );
733 Int_t nevents = Data()->GetNEvents();
737 Timer timer( nsamples, GetName() );
738 fIPyMaxIter = nsamples;
741 TRandom3*rnd =
new TRandom3( seed );
743 Log() << kINFO <<
"Running Monte-Carlo-Event sampling over " << nsamples <<
" events" << Endl;
744 std::vector<Double_t> pars( 2*GetNvar() );
746 for (Int_t itoy=0; itoy<nsamples; itoy++) {
747 fIPyCurrentIter = ic;
748 if (fExitFromTraining)
break;
750 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
755 Bool_t isSignal = kFALSE;
757 Double_t evt1 = 0., evt2 = 0.;
760 ievt1 = Int_t(rnd->Uniform(0.,1.)*nevents);
761 ievt2 = Int_t(rnd->Uniform(0.,1.)*nevents);
763 const Event *ev1 = GetEvent(ievt1);
764 isSignal = DataInfo().IsSignal(ev1);
765 evt1 = ev1->GetValue( ivar );
767 const Event *ev2 = GetEvent(ievt2);
768 isSignal &= DataInfo().IsSignal(ev2);
769 evt2 = ev2->GetValue( ivar );
771 if (nbreak++ > 10000) {
772 Log() << kFATAL <<
"<MCEvents>: could not find signal events"
773 <<
" after 10000 trials - do you have signal events in your sample ?"
780 if (evt1 > evt2) { Double_t z = evt1; evt1 = evt2; evt2 = z; }
782 pars[2*ivar+1] = evt2 - evt1;
786 EstimatorFunction( pars );
790 if ((nsamples<1000) || ic%1000 == 0) timer.DrawProgressBar( ic );
796 else Log() << kFATAL <<
"Unknown minimisation method: " << fFitMethod << Endl;
798 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
799 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
802 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
803 for (Int_t ibin=0; ibin<fNbins; ibin++) {
805 if ((*fFitParams)[ivar] == kForceMin && fCutMin[ivar][ibin] > -fgMaxAbsCutVal) {
806 fCutMin[ivar][ibin] = -fgMaxAbsCutVal;
808 if ((*fFitParams)[ivar] == kForceMax && fCutMax[ivar][ibin] < fgMaxAbsCutVal) {
809 fCutMax[ivar][ibin] = fgMaxAbsCutVal;
817 Double_t epsilon = 0.0001;
818 for (Double_t eff=0.1; eff<0.95; eff += 0.1) PrintCuts( eff+epsilon );
820 if (!fExitFromTraining) fIPyMaxIter = fIPyCurrentIter;
827 void TMVA::MethodCuts::TestClassification()
834 Double_t TMVA::MethodCuts::EstimatorFunction( Int_t ievt1, Int_t ievt2 )
836 const Event *ev1 = GetEvent(ievt1);
837 if (!DataInfo().IsSignal(ev1))
return -1;
839 const Event *ev2 = GetEvent(ievt2);
840 if (!DataInfo().IsSignal(ev2))
return -1;
842 const Int_t nvar = GetNvar();
843 Double_t* evt1 =
new Double_t[nvar];
844 Double_t* evt2 =
new Double_t[nvar];
846 for (Int_t ivar=0; ivar<nvar; ivar++) {
847 evt1[ivar] = ev1->GetValue( ivar );
848 evt2[ivar] = ev2->GetValue( ivar );
852 std::vector<Double_t> pars;
853 for (Int_t ivar=0; ivar<nvar; ivar++) {
856 if (evt1[ivar] < evt2[ivar]) {
865 pars.push_back( cutMin );
866 pars.push_back( cutMax - cutMin );
872 return ComputeEstimator( pars );
878 Double_t TMVA::MethodCuts::EstimatorFunction( std::vector<Double_t>& pars )
880 return ComputeEstimator( pars );
893 Double_t TMVA::MethodCuts::ComputeEstimator( std::vector<Double_t>& pars )
899 Double_t effS = 0, effB = 0;
900 this->MatchParsToCuts( pars, &fTmpCutMin[0], &fTmpCutMax[0] );
903 switch (fEffMethod) {
905 this->GetEffsfromPDFs (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
907 case kUseEventSelection:
908 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
911 this->GetEffsfromSelection (&fTmpCutMin[0], &fTmpCutMax[0], effS, effB);
922 Int_t ibinS = fEffBvsSLocal->FindBin( effS );
924 Double_t effBH = fEffBvsSLocal->GetBinContent( ibinS );
925 Double_t effBH_left = (ibinS > 1 ) ? fEffBvsSLocal->GetBinContent( ibinS-1 ) : effBH;
926 Double_t effBH_right = (ibinS < fNbins) ? fEffBvsSLocal->GetBinContent( ibinS+1 ) : effBH;
928 Double_t average = 0.5*(effBH_left + effBH_right);
929 if (effBH < effB) average = effBH;
933 eta = ( -TMath::Abs(effBH-average) + (1.0 - (effBH - effB))) / (1.0 + effS);
940 if (effBH < 0 || effBH > effB) {
941 fEffBvsSLocal->SetBinContent( ibinS, effB );
942 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
943 fCutMin[ivar][ibinS-1] = fTmpCutMin[ivar];
944 fCutMax[ivar][ibinS-1] = fTmpCutMax[ivar];
957 Double_t penalty=0.,diff=0.;
958 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
959 diff=(fCutRange[ivar]->GetMax()-fTmpCutMax[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
961 diff=(fCutRange[ivar]->GetMin()-fTmpCutMin[ivar])/(fCutRange[ivar]->GetMax()-fCutRange[ivar]->GetMin());
962 penalty+=4.*diff*diff;
965 if (effS<1.e-4)
return 10.0+penalty;
966 else return 10.*(1.-10.*effS);
974 void TMVA::MethodCuts::MatchParsToCuts(
const std::vector<Double_t> & pars,
975 Double_t* cutMin, Double_t* cutMax )
977 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
979 cutMin[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] : pars[ipar] - pars[ipar+1];
980 cutMax[ivar] = ((*fRangeSign)[ivar] > 0) ? pars[ipar] + pars[ipar+1] : pars[ipar];
987 void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars,
988 Double_t** cutMinAll, Double_t** cutMaxAll, Int_t ibin )
990 if (ibin < 1 || ibin > fNbins) Log() << kFATAL <<
"::MatchCutsToPars: bin error: "
993 const UInt_t nvar = GetNvar();
994 Double_t *cutMin =
new Double_t[nvar];
995 Double_t *cutMax =
new Double_t[nvar];
996 for (UInt_t ivar=0; ivar<nvar; ivar++) {
997 cutMin[ivar] = cutMinAll[ivar][ibin-1];
998 cutMax[ivar] = cutMaxAll[ivar][ibin-1];
1001 MatchCutsToPars( pars, cutMin, cutMax );
1009 void TMVA::MethodCuts::MatchCutsToPars( std::vector<Double_t>& pars,
1010 Double_t* cutMin, Double_t* cutMax )
1012 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1013 Int_t ipar = 2*ivar;
1014 pars[ipar] = ((*fRangeSign)[ivar] > 0) ? cutMin[ivar] : cutMax[ivar];
1015 pars[ipar+1] = cutMax[ivar] - cutMin[ivar];
1023 void TMVA::MethodCuts::GetEffsfromPDFs( Double_t* cutMin, Double_t* cutMax,
1024 Double_t& effS, Double_t& effB )
1028 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1029 effS *= (*fVarPdfS)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1030 effB *= (*fVarPdfB)[ivar]->GetIntegral( cutMin[ivar], cutMax[ivar] );
1036 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
1037 fNegEffWarning = kTRUE;
1041 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
1042 fNegEffWarning = kTRUE;
1050 void TMVA::MethodCuts::GetEffsfromSelection( Double_t* cutMin, Double_t* cutMax,
1051 Double_t& effS, Double_t& effB)
1053 Float_t nTotS = 0, nTotB = 0;
1054 Float_t nSelS = 0, nSelB = 0;
1056 Volume* volume =
new Volume( cutMin, cutMax, GetNvar() );
1059 nSelS = fBinaryTreeS->SearchVolume( volume );
1060 nSelB = fBinaryTreeB->SearchVolume( volume );
1065 nTotS = fBinaryTreeS->GetSumOfWeights();
1066 nTotB = fBinaryTreeB->GetSumOfWeights();
1069 if (nTotS == 0 && nTotB == 0) {
1070 Log() << kFATAL <<
"<GetEffsfromSelection> fatal error in zero total number of events:"
1071 <<
" nTotS, nTotB: " << nTotS <<
" " << nTotB <<
" ***" << Endl;
1078 Log() << kWARNING <<
"<ComputeEstimator> zero number of signal events" << Endl;
1080 else if (nTotB == 0) {
1083 Log() << kWARNING <<
"<ComputeEstimator> zero number of background events" << Endl;
1093 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative signal efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
1094 fNegEffWarning = kTRUE;
1098 if( !fNegEffWarning ) Log() << kWARNING <<
"Negative background efficiency found and set to 0. This is probably due to many events with negative weights in a certain cut-region." << Endl;
1099 fNegEffWarning = kTRUE;
1106 void TMVA::MethodCuts::CreateVariablePDFs(
void )
1109 fVarHistS =
new std::vector<TH1*>( GetNvar() );
1110 fVarHistB =
new std::vector<TH1*>( GetNvar() );
1111 fVarHistS_smooth =
new std::vector<TH1*>( GetNvar() );
1112 fVarHistB_smooth =
new std::vector<TH1*>( GetNvar() );
1113 fVarPdfS =
new std::vector<PDF*>( GetNvar() );
1114 fVarPdfB =
new std::vector<PDF*>( GetNvar() );
1119 Double_t minVal = DBL_MAX;
1120 Double_t maxVal = -DBL_MAX;
1121 for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
1122 const Event *ev = GetEvent(ievt);
1123 Float_t val = ev->GetValue(ievt);
1124 if( val > minVal ) minVal = val;
1125 if( val < maxVal ) maxVal = val;
1128 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1131 TString histTitle = (*fInputVars)[ivar] +
" signal training";
1132 TString histName = (*fInputVars)[ivar] +
"_sig";
1143 (*fVarHistS)[ivar] =
new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
1146 histTitle = (*fInputVars)[ivar] +
" background training";
1147 histName = (*fInputVars)[ivar] +
"_bgd";
1158 (*fVarHistB)[ivar] =
new TH1F(histName.Data(), histTitle.Data(), fNbins, minVal, maxVal );
1160 for( UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++ ){
1161 const Event *ev = GetEvent(ievt);
1162 Float_t val = ev->GetValue(ievt);
1163 if( DataInfo().IsSignal(ev) ){
1164 (*fVarHistS)[ivar]->Fill( val );
1166 (*fVarHistB)[ivar]->Fill( val );
1173 (*fVarHistS_smooth)[ivar] = (TH1F*)(*fVarHistS)[ivar]->Clone();
1174 histTitle = (*fInputVars)[ivar] +
" signal training smoothed ";
1175 histTitle += nsmooth;
1176 histTitle +=
" times";
1177 histName = (*fInputVars)[ivar] +
"_sig_smooth";
1178 (*fVarHistS_smooth)[ivar]->SetName(histName);
1179 (*fVarHistS_smooth)[ivar]->SetTitle(histTitle);
1182 (*fVarHistS_smooth)[ivar]->Smooth(nsmooth);
1197 (*fVarHistB_smooth)[ivar] = (TH1F*)(*fVarHistB)[ivar]->Clone();
1198 histTitle = (*fInputVars)[ivar]+
" background training smoothed ";
1199 histTitle += nsmooth;
1200 histTitle +=
" times";
1201 histName = (*fInputVars)[ivar]+
"_bgd_smooth";
1202 (*fVarHistB_smooth)[ivar]->SetName(histName);
1203 (*fVarHistB_smooth)[ivar]->SetTitle(histTitle);
1206 (*fVarHistB_smooth)[ivar]->Smooth(nsmooth);
1209 (*fVarPdfS)[ivar] =
new PDF( TString(GetName()) +
" PDF Var Sig " + GetInputVar( ivar ), (*fVarHistS_smooth)[ivar], PDF::kSpline2 );
1210 (*fVarPdfB)[ivar] =
new PDF( TString(GetName()) +
" PDF Var Bkg " + GetInputVar( ivar ), (*fVarHistB_smooth)[ivar], PDF::kSpline2 );
1217 void TMVA::MethodCuts::ReadWeightsFromStream( std::istream& istr )
1223 istr >> dummy >> dummy;
1225 istr >> dummy >> fNbins;
1228 istr >> dummy >> dummy >> dummy >> dummy >> dummy >> dummy >> dummyInt >> dummy ;
1231 if (dummyInt != Data()->GetNVariables()) {
1232 Log() << kFATAL <<
"<ReadWeightsFromStream> fatal error: mismatch "
1233 <<
"in number of variables: " << dummyInt <<
" != " << Data()->GetNVariables() << Endl;
1238 if (fFitMethod == kUseMonteCarlo) {
1239 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" << Endl;
1241 else if (fFitMethod == kUseMonteCarloEvents) {
1242 Log() << kWARNING <<
"Read cuts optimised using sample of MC events" << Endl;
1244 else if (fFitMethod == kUseGeneticAlgorithm) {
1245 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" << Endl;
1247 else if (fFitMethod == kUseSimulatedAnnealing) {
1248 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" << Endl;
1250 else if (fFitMethod == kUseEventScan) {
1251 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" << Endl;
1254 Log() << kWARNING <<
"unknown method: " << fFitMethod << Endl;
1256 Log() << kINFO <<
"in " << fNbins <<
" signal efficiency bins and for " << GetNvar() <<
" variables" << Endl;
1260 istr.getline(buffer,200);
1261 istr.getline(buffer,200);
1264 Float_t tmpeffS, tmpeffB;
1265 if (fEffBvsSLocal != 0)
delete fEffBvsSLocal;
1266 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1267 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1268 fEffBvsSLocal->SetDirectory(0);
1270 for (Int_t ibin=0; ibin<fNbins; ibin++) {
1271 istr >> tmpbin >> tmpeffS >> tmpeffB;
1272 fEffBvsSLocal->SetBinContent( ibin+1, tmpeffB );
1274 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1275 istr >> fCutMin[ivar][ibin] >> fCutMax[ivar][ibin];
1279 fEffSMin = fEffBvsSLocal->GetBinCenter(1);
1280 fEffSMax = fEffBvsSLocal->GetBinCenter(fNbins);
1287 void TMVA::MethodCuts::AddWeightsXMLTo(
void* parent )
const
1290 std::vector<Double_t> cutsMin;
1291 std::vector<Double_t> cutsMax;
1293 void* wght = gTools().AddChild(parent,
"Weights");
1294 gTools().AddAttr( wght,
"OptimisationMethod", (Int_t)fEffMethod);
1295 gTools().AddAttr( wght,
"FitMethod", (Int_t)fFitMethod );
1296 gTools().AddAttr( wght,
"nbins", fNbins );
1297 gTools().AddComment( wght, Form(
"Below are the optimised cuts for %i variables: Format: ibin(hist) effS effB cutMin[ivar=0] cutMax[ivar=0] ... cutMin[ivar=n-1] cutMax[ivar=n-1]", GetNvar() ) );
1307 for (Int_t ibin=0; ibin<fNbins; ibin++) {
1308 Double_t effS = fEffBvsSLocal->GetBinCenter ( ibin + 1 );
1309 Double_t trueEffS = GetCuts( effS, cutsMin, cutsMax );
1310 if (TMath::Abs(trueEffS) < 1e-10) trueEffS = 0;
1312 void* binxml = gTools().AddChild( wght,
"Bin" );
1313 gTools().AddAttr( binxml,
"ibin", ibin+1 );
1314 gTools().AddAttr( binxml,
"effS", trueEffS );
1315 gTools().AddAttr( binxml,
"effB", fEffBvsSLocal->GetBinContent( ibin + 1 ) );
1316 void* cutsxml = gTools().AddChild( binxml,
"Cuts" );
1317 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1318 gTools().AddAttr( cutsxml, Form(
"cutMin_%i", ivar ), cutsMin[ivar] );
1319 gTools().AddAttr( cutsxml, Form(
"cutMax_%i", ivar ), cutsMax[ivar] );
1327 void TMVA::MethodCuts::ReadWeightsFromXML(
void* wghtnode )
1330 for (UInt_t i=0; i<GetNvar(); i++) {
1331 if (fCutMin[i] != 0)
delete [] fCutMin[i];
1332 if (fCutMax[i] != 0)
delete [] fCutMax[i];
1334 if (fCutMin != 0)
delete [] fCutMin;
1335 if (fCutMax != 0)
delete [] fCutMax;
1337 Int_t tmpEffMethod, tmpFitMethod;
1338 gTools().ReadAttr( wghtnode,
"OptimisationMethod", tmpEffMethod );
1339 gTools().ReadAttr( wghtnode,
"FitMethod", tmpFitMethod );
1340 gTools().ReadAttr( wghtnode,
"nbins", fNbins );
1342 fEffMethod = (EEffMethod)tmpEffMethod;
1343 fFitMethod = (EFitMethodType)tmpFitMethod;
1346 if (fFitMethod == kUseMonteCarlo) {
1347 Log() << kINFO <<
"Read cuts optimised using sample of MC events" << Endl;
1349 else if (fFitMethod == kUseMonteCarloEvents) {
1350 Log() << kINFO <<
"Read cuts optimised using sample of MC-Event events" << Endl;
1352 else if (fFitMethod == kUseGeneticAlgorithm) {
1353 Log() << kINFO <<
"Read cuts optimised using Genetic Algorithm" << Endl;
1355 else if (fFitMethod == kUseSimulatedAnnealing) {
1356 Log() << kINFO <<
"Read cuts optimised using Simulated Annealing algorithm" << Endl;
1358 else if (fFitMethod == kUseEventScan) {
1359 Log() << kINFO <<
"Read cuts optimised using Full Event Scan" << Endl;
1362 Log() << kWARNING <<
"unknown method: " << fFitMethod << Endl;
1364 Log() << kINFO <<
"Reading " << fNbins <<
" signal efficiency bins for " << GetNvar() <<
" variables" << Endl;
1366 delete fEffBvsSLocal;
1367 fEffBvsSLocal =
new TH1F( GetTestvarName() +
"_effBvsSLocal",
1368 TString(GetName()) +
" efficiency of B vs S", fNbins, 0.0, 1.0 );
1369 fEffBvsSLocal->SetDirectory(0);
1370 for (Int_t ibin=1; ibin<=fNbins; ibin++) fEffBvsSLocal->SetBinContent( ibin, -0.1 );
1372 fCutMin =
new Double_t*[GetNvar()];
1373 fCutMax =
new Double_t*[GetNvar()];
1374 for (UInt_t i=0;i<GetNvar();i++) {
1375 fCutMin[i] =
new Double_t[fNbins];
1376 fCutMax[i] =
new Double_t[fNbins];
1381 Float_t tmpeffS, tmpeffB;
1382 void* ch = gTools().GetChild(wghtnode,
"Bin");
1389 gTools().ReadAttr( ch,
"ibin", tmpbin );
1390 gTools().ReadAttr( ch,
"effS", tmpeffS );
1391 gTools().ReadAttr( ch,
"effB", tmpeffB );
1394 if (tmpbin-1 >= fNbins || tmpbin-1 < 0) {
1395 Log() << kFATAL <<
"Mismatch in bins: " << tmpbin-1 <<
" >= " << fNbins << Endl;
1398 fEffBvsSLocal->SetBinContent( tmpbin, tmpeffB );
1399 void* ct = gTools().GetChild(ch);
1400 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1401 gTools().ReadAttr( ct, Form(
"cutMin_%i", ivar ), fCutMin[ivar][tmpbin-1] );
1402 gTools().ReadAttr( ct, Form(
"cutMax_%i", ivar ), fCutMax[ivar][tmpbin-1] );
1404 ch = gTools().GetNextChild(ch,
"Bin");
1411 void TMVA::MethodCuts::WriteMonitoringHistosToFile(
void )
const
1413 Log() << kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
1415 fEffBvsSLocal->Write();
1418 if (fEffMethod == kUsePDFs) {
1419 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1420 (*fVarHistS)[ivar]->Write();
1421 (*fVarHistB)[ivar]->Write();
1422 (*fVarHistS_smooth)[ivar]->Write();
1423 (*fVarHistB_smooth)[ivar]->Write();
1424 (*fVarPdfS)[ivar]->GetPDFHist()->Write();
1425 (*fVarPdfB)[ivar]->GetPDFHist()->Write();
1442 Double_t TMVA::MethodCuts::GetTrainingEfficiency(
const TString& theString)
1445 TList* list = gTools().ParseFormatLine( theString );
1447 if (list->GetSize() != 2) {
1448 Log() << kFATAL <<
"<GetTrainingEfficiency> wrong number of arguments"
1449 <<
" in string: " << theString
1450 <<
" | required format, e.g., Efficiency:0.05" << Endl;
1454 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, GetAnalysisType());
1458 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
1463 if (results->GetHist(
"EFF_BVSS_TR")==0) {
1465 if (fBinaryTreeS != 0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1466 if (fBinaryTreeB != 0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1468 fBinaryTreeS =
new BinarySearchTree();
1469 fBinaryTreeS->Fill( GetEventCollection(Types::kTraining), fSignalClass );
1470 fBinaryTreeB =
new BinarySearchTree();
1471 fBinaryTreeB->Fill( GetEventCollection(Types::kTraining), fBackgroundClass );
1479 TH1* eff_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingEffBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1480 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_bvss_tr->SetBinContent( ibin, -0.1 );
1481 TH1* rej_bvss_tr =
new TH1F( GetTestvarName() +
"_trainingRejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1482 for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_bvss_tr->SetBinContent( ibin, 0. );
1483 results->Store(eff_bvss_tr,
"EFF_BVSS_TR");
1484 results->Store(rej_bvss_tr,
"REJ_BVSS_TR");
1489 Double_t* tmpCutMin =
new Double_t[GetNvar()];
1490 Double_t* tmpCutMax =
new Double_t[GetNvar()];
1491 Int_t nFailedBins=0;
1492 for (Int_t bini=1; bini<=fNbins; bini++) {
1493 for (UInt_t ivar=0; ivar <GetNvar(); ivar++){
1494 tmpCutMin[ivar] = fCutMin[ivar][bini-1];
1495 tmpCutMax[ivar] = fCutMax[ivar][bini-1];
1498 Double_t effS, effB;
1499 this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
1501 Int_t effBin = eff_bvss_tr->GetXaxis()->FindBin(effS);
1502 if (effBin != bini){
1503 Log()<< kVERBOSE <<
"unable to fill efficiency bin " << bini<<
" " << effBin <<Endl;
1508 eff_bvss_tr->SetBinContent( bini, effB );
1509 rej_bvss_tr->SetBinContent( bini, 1.0-effB );
1512 if (nFailedBins>0) Log()<< kWARNING <<
" unable to fill "<< nFailedBins <<
" efficiency bins " <<Endl;
1514 delete [] tmpCutMin;
1515 delete [] tmpCutMax;
1518 fSplTrainEffBvsS =
new TSpline1(
"trainEffBvsS",
new TGraph( eff_bvss_tr ) );
1522 if (NULL == fSplTrainEffBvsS)
return 0.0;
1525 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
1526 Int_t nbins_ = 1000;
1529 for (Int_t bini=1; bini<=nbins_; bini++) {
1531 effS = (bini - 0.5)/Float_t(nbins_);
1532 effB = fSplTrainEffBvsS->Eval( effS );
1535 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1540 return 0.5*(effS + effS_);
1555 Double_t TMVA::MethodCuts::GetEfficiency(
const TString& theString, Types::ETreeType type, Double_t& effSerr )
1557 Data()->SetCurrentType(type);
1559 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, GetAnalysisType() );
1562 TList* list = gTools().ParseFormatLine( theString,
":" );
1564 if (list->GetSize() > 2) {
1566 Log() << kFATAL <<
"<GetEfficiency> wrong number of arguments"
1567 <<
" in string: " << theString
1568 <<
" | required format, e.g., Efficiency:0.05, or empty string" << Endl;
1573 Bool_t computeArea = (list->GetSize() < 2);
1577 Float_t effBref = (computeArea?1.:atof( ((TObjString*)list->At(1))->GetString() ));
1583 if (results->GetHist(
"MVA_EFF_BvsS")==0) {
1585 if (fBinaryTreeS!=0) {
delete fBinaryTreeS; fBinaryTreeS = 0; }
1586 if (fBinaryTreeB!=0) {
delete fBinaryTreeB; fBinaryTreeB = 0; }
1591 fBinaryTreeS =
new BinarySearchTree();
1592 fBinaryTreeS->Fill( GetEventCollection(Types::kTesting), fSignalClass );
1593 fBinaryTreeB =
new BinarySearchTree();
1594 fBinaryTreeB->Fill( GetEventCollection(Types::kTesting), fBackgroundClass );
1603 TH1* eff_BvsS =
new TH1F( GetTestvarName() +
"_effBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1604 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_BvsS->SetBinContent( ibin, -0.1 );
1605 TH1* rej_BvsS =
new TH1F( GetTestvarName() +
"_rejBvsS", GetTestvarName() +
"", fNbins, 0, 1 );
1606 for (Int_t ibin=1; ibin<=fNbins; ibin++) rej_BvsS->SetBinContent( ibin, 0.0 );
1607 results->Store(eff_BvsS,
"MVA_EFF_BvsS");
1608 results->Store(rej_BvsS);
1611 Double_t xmax = 1.000001;
1613 TH1* eff_s =
new TH1F( GetTestvarName() +
"_effS", GetTestvarName() +
" (signal)", fNbins, xmin, xmax);
1614 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_s->SetBinContent( ibin, -0.1 );
1615 TH1* eff_b =
new TH1F( GetTestvarName() +
"_effB", GetTestvarName() +
" (background)", fNbins, xmin, xmax);
1616 for (Int_t ibin=1; ibin<=fNbins; ibin++) eff_b->SetBinContent( ibin, -0.1 );
1617 results->Store(eff_s,
"MVA_S");
1618 results->Store(eff_b,
"MVA_B");
1623 Double_t* tmpCutMin =
new Double_t[GetNvar()];
1624 Double_t* tmpCutMax =
new Double_t[GetNvar()];
1625 TGraph* tmpBvsS =
new TGraph(fNbins+1);
1626 tmpBvsS->SetPoint(0, 0., 0.);
1628 for (Int_t bini=1; bini<=fNbins; bini++) {
1629 for (UInt_t ivar=0; ivar <GetNvar(); ivar++) {
1630 tmpCutMin[ivar] = fCutMin[ivar][bini-1];
1631 tmpCutMax[ivar] = fCutMax[ivar][bini-1];
1634 Double_t effS, effB;
1635 this->GetEffsfromSelection( &tmpCutMin[0], &tmpCutMax[0], effS, effB);
1636 tmpBvsS->SetPoint(bini, effS, effB);
1638 eff_s->SetBinContent(bini, effS);
1639 eff_b->SetBinContent(bini, effB);
1641 tmpBvsS->SetPoint(fNbins+1, 1., 1.);
1643 delete [] tmpCutMin;
1644 delete [] tmpCutMax;
1647 fSpleffBvsS =
new TSpline1(
"effBvsS", tmpBvsS );
1648 for (Int_t bini=1; bini<=fNbins; bini++) {
1649 Double_t effS = (bini - 0.5)/Float_t(fNbins);
1650 Double_t effB = fSpleffBvsS->Eval( effS );
1651 eff_BvsS->SetBinContent( bini, effB );
1652 rej_BvsS->SetBinContent( bini, 1.0-effB );
1657 if (NULL == fSpleffBvsS)
return 0.0;
1660 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
1661 Int_t nbins_ = 1000;
1666 Double_t integral = 0;
1667 for (Int_t bini=1; bini<=nbins_; bini++) {
1670 effS = (bini - 0.5)/Float_t(nbins_);
1671 effB = fSpleffBvsS->Eval( effS );
1672 integral += (1.0 - effB);
1681 for (Int_t bini=1; bini<=nbins_; bini++) {
1683 effS = (bini - 0.5)/Float_t(nbins_);
1684 effB = fSpleffBvsS->Eval( effS );
1687 if ((effB - effBref)*(effB_ - effBref) < 0)
break;
1692 effS = 0.5*(effS + effS_);
1694 if (Data()->GetNEvtSigTest() > 0)
1695 effSerr = TMath::Sqrt( effS*(1.0 - effS)/Double_t(Data()->GetNEvtSigTest()) );
1707 void TMVA::MethodCuts::MakeClassSpecific( std::ostream& fout,
const TString& className )
const
1709 fout <<
" // not implemented for class: \"" << className <<
"\"" << std::endl;
1710 fout <<
"};" << std::endl;
1719 void TMVA::MethodCuts::GetHelpMessage()
const
1721 TString bold = gConfig().WriteOptionsReference() ?
"<b>" :
"";
1722 TString resbold = gConfig().WriteOptionsReference() ?
"</b>" :
"";
1723 TString brk = gConfig().WriteOptionsReference() ?
"<br>" :
"";
1726 Log() << gTools().Color(
"bold") <<
"--- Short description:" << gTools().Color(
"reset") << Endl;
1728 Log() <<
"The optimisation of rectangular cuts performed by TMVA maximises " << Endl;
1729 Log() <<
"the background rejection at given signal efficiency, and scans " << Endl;
1730 Log() <<
"over the full range of the latter quantity. Three optimisation" << Endl;
1731 Log() <<
"methods are optional: Monte Carlo sampling (MC), a Genetics" << Endl;
1732 Log() <<
"Algorithm (GA), and Simulated Annealing (SA). GA and SA are" << Endl;
1733 Log() <<
"expected to perform best." << Endl;
1735 Log() <<
"The difficulty to find the optimal cuts strongly increases with" << Endl;
1736 Log() <<
"the dimensionality (number of input variables) of the problem." << Endl;
1737 Log() <<
"This behavior is due to the non-uniqueness of the solution space."<< Endl;
1739 Log() << gTools().Color(
"bold") <<
"--- Performance optimisation:" << gTools().Color(
"reset") << Endl;
1741 Log() <<
"If the dimensionality exceeds, say, 4 input variables, it is " << Endl;
1742 Log() <<
"advisable to scrutinize the separation power of the variables," << Endl;
1743 Log() <<
"and to remove the weakest ones. If some among the input variables" << Endl;
1744 Log() <<
"can be described by a single cut (e.g., because signal tends to be" << Endl;
1745 Log() <<
"larger than background), this can be indicated to MethodCuts via" << Endl;
1746 Log() <<
"the \"Fsmart\" options (see option string). Choosing this option" << Endl;
1747 Log() <<
"reduces the number of requirements for the variable from 2 (min/max)" << Endl;
1748 Log() <<
"to a single one (TMVA finds out whether it is to be interpreted as" << Endl;
1749 Log() <<
"min or max)." << Endl;
1751 Log() << gTools().Color(
"bold") <<
"--- Performance tuning via configuration options:" << gTools().Color(
"reset") << Endl;
1752 Log() <<
"" << Endl;
1753 Log() << bold <<
"Monte Carlo sampling:" << resbold << Endl;
1754 Log() <<
"" << Endl;
1755 Log() <<
"Apart form the \"Fsmart\" option for the variables, the only way" << Endl;
1756 Log() <<
"to improve the MC sampling is to increase the sampling rate. This" << Endl;
1757 Log() <<
"is done via the configuration option \"MC_NRandCuts\". The execution" << Endl;
1758 Log() <<
"time scales linearly with the sampling rate." << Endl;
1759 Log() <<
"" << Endl;
1760 Log() << bold <<
"Genetic Algorithm:" << resbold << Endl;
1761 Log() <<
"" << Endl;
1762 Log() <<
"The algorithm terminates if no significant fitness increase has" << Endl;
1763 Log() <<
"been achieved within the last \"nsteps\" steps of the calculation." << Endl;
1764 Log() <<
"Wiggles in the ROC curve or constant background rejection of 1" << Endl;
1765 Log() <<
"indicate that the GA failed to always converge at the true maximum" << Endl;
1766 Log() <<
"fitness. In such a case, it is recommended to broaden the search " << Endl;
1767 Log() <<
"by increasing the population size (\"popSize\") and to give the GA " << Endl;
1768 Log() <<
"more time to find improvements by increasing the number of steps" << Endl;
1769 Log() <<
"(\"nsteps\")" << Endl;
1770 Log() <<
" -> increase \"popSize\" (at least >10 * number of variables)" << Endl;
1771 Log() <<
" -> increase \"nsteps\"" << Endl;
1772 Log() <<
"" << Endl;
1773 Log() << bold <<
"Simulated Annealing (SA) algorithm:" << resbold << Endl;
1774 Log() <<
"" << Endl;
1775 Log() <<
"\"Increasing Adaptive\" approach:" << Endl;
1776 Log() <<
"" << Endl;
1777 Log() <<
"The algorithm seeks local minima and explores their neighborhoods, while" << Endl;
1778 Log() <<
"changing the ambient temperature depending on the number of failures" << Endl;
1779 Log() <<
"in the previous steps. The performance can be improved by increasing" << Endl;
1780 Log() <<
"the number of iteration steps (\"MaxCalls\"), or by adjusting the" << Endl;
1781 Log() <<
"minimal temperature (\"MinTemperature\"). Manual adjustments of the" << Endl;
1782 Log() <<
"speed of the temperature increase (\"TemperatureScale\" and \"AdaptiveSpeed\")" << Endl;
1783 Log() <<
"to individual data sets should also help. Summary:" << brk << Endl;
1784 Log() <<
" -> increase \"MaxCalls\"" << brk << Endl;
1785 Log() <<
" -> adjust \"MinTemperature\"" << brk << Endl;
1786 Log() <<
" -> adjust \"TemperatureScale\"" << brk << Endl;
1787 Log() <<
" -> adjust \"AdaptiveSpeed\"" << Endl;
1788 Log() <<
"" << Endl;
1789 Log() <<
"\"Decreasing Adaptive\" approach:" << Endl;
1790 Log() <<
"" << Endl;
1791 Log() <<
"The algorithm calculates the initial temperature (based on the effect-" << Endl;
1792 Log() <<
"iveness of large steps) and the multiplier that ensures to reach the" << Endl;
1793 Log() <<
"minimal temperature with the requested number of iteration steps." << Endl;
1794 Log() <<
"The performance can be improved by adjusting the minimal temperature" << Endl;
1795 Log() <<
" (\"MinTemperature\") and by increasing number of steps (\"MaxCalls\"):" << brk << Endl;
1796 Log() <<
" -> increase \"MaxCalls\"" << brk << Endl;
1797 Log() <<
" -> adjust \"MinTemperature\"" << Endl;
1798 Log() <<
" " << Endl;
1799 Log() <<
"Other kernels:" << Endl;
1800 Log() <<
"" << Endl;
1801 Log() <<
"Alternative ways of counting the temperature change are implemented. " << Endl;
1802 Log() <<
"Each of them starts with the maximum temperature (\"MaxTemperature\")" << Endl;
1803 Log() <<
"and decreases while changing the temperature according to a given" << Endl;
1804 Log() <<
"prescription:" << brk << Endl;
1805 Log() <<
"CurrentTemperature =" << brk << Endl;
1806 Log() <<
" - Sqrt: InitialTemperature / Sqrt(StepNumber+2) * TemperatureScale" << brk << Endl;
1807 Log() <<
" - Log: InitialTemperature / Log(StepNumber+2) * TemperatureScale" << brk << Endl;
1808 Log() <<
" - Homo: InitialTemperature / (StepNumber+2) * TemperatureScale" << brk << Endl;
1809 Log() <<
" - Sin: (Sin(StepNumber / TemperatureScale) + 1) / (StepNumber + 1)*InitialTemperature + Eps" << brk << Endl;
1810 Log() <<
" - Geo: CurrentTemperature * TemperatureScale" << Endl;
1811 Log() <<
"" << Endl;
1812 Log() <<
"Their performance can be improved by adjusting initial temperature" << Endl;
1813 Log() <<
"(\"InitialTemperature\"), the number of iteration steps (\"MaxCalls\")," << Endl;
1814 Log() <<
"and the multiplier that scales the temperature decrease" << Endl;
1815 Log() <<
"(\"TemperatureScale\")" << brk << Endl;
1816 Log() <<
" -> increase \"MaxCalls\"" << brk << Endl;
1817 Log() <<
" -> adjust \"InitialTemperature\"" << brk << Endl;
1818 Log() <<
" -> adjust \"TemperatureScale\"" << brk << Endl;
1819 Log() <<
" -> adjust \"KernelTemperature\"" << Endl;