56 ClassImp(TMVA::OptimizeConfigParameters);
61 TMVA::OptimizeConfigParameters::OptimizeConfigParameters(MethodBase *
const method, std::map<TString,TMVA::Interval*> tuneParameters, TString fomType, TString optimizationFitType)
63 fTuneParameters(tuneParameters),
65 fOptimizationFitType(optimizationFitType),
72 std::string name =
"OptimizeConfigParameters_";
73 name += std::string(GetMethod()->GetName());
74 fLogger =
new MsgLogger(name);
75 if (fMethod->DoRegression()){
76 Log() << kFATAL <<
" ERROR: Sorry, Regression is not yet implement for automatic parameter optimization"
77 <<
" --> exit" << Endl;
80 Log() << kINFO <<
"Automatic optimisation of tuning parameters in "
81 << GetMethod()->GetName() <<
" uses:" << Endl;
83 std::map<TString,TMVA::Interval*>::iterator it;
84 for (it=fTuneParameters.begin(); it!=fTuneParameters.end();++it) {
85 Log() << kINFO << it->first
86 <<
" in range from: " << it->second->GetMin()
87 <<
" to: " << it->second->GetMax()
88 <<
" in : " << it->second->GetNbins() <<
" steps"
91 Log() << kINFO <<
" using the options: " << fFOMType <<
" and " << fOptimizationFitType << Endl;
97 TMVA::OptimizeConfigParameters::~OptimizeConfigParameters()
99 if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
100 Int_t n=Int_t(fFOMvsIter.size());
101 Float_t *x =
new Float_t[n];
102 Float_t *y =
new Float_t[n];
103 Float_t ymin=(Float_t)+999999999;
104 Float_t ymax=(Float_t)-999999999;
106 for (Int_t i=0;i<n;i++){
108 y[i] = fFOMvsIter[i];
109 if (ymin>y[i]) ymin=y[i];
110 if (ymax<y[i]) ymax=y[i];
113 TH2D *h=
new TH2D(TString(GetMethod()->GetName())+
"_FOMvsIterFrame",
"",2,0,n,2,ymin*0.95,ymax*1.05);
114 h->SetXTitle(
"#iteration "+fOptimizationFitType);
115 h->SetYTitle(fFOMType);
116 TGraph *gFOMvsIter =
new TGraph(n,x,y);
117 gFOMvsIter->SetName((TString(GetMethod()->GetName())+
"_FOMvsIter").Data());
118 if(!GetMethod()->IsSilentFile()) gFOMvsIter->Write();
119 if(!GetMethod()->IsSilentFile()) h->Write();
128 std::map<TString,Double_t> TMVA::OptimizeConfigParameters::optimize()
130 if (fOptimizationFitType ==
"Scan" ) this->optimizeScan();
131 else if (fOptimizationFitType ==
"FitGA" || fOptimizationFitType ==
"Minuit" ) this->optimizeFit();
133 Log() << kFATAL <<
"You have chosen as optimization type " << fOptimizationFitType
134 <<
" that is not (yet) coded --> exit()" << Endl;
137 Log() << kINFO <<
"For " << GetMethod()->GetName() <<
" the optimized Parameters are: " << Endl;
138 std::map<TString,Double_t>::iterator it;
139 for(it=fTunedParameters.begin(); it!= fTunedParameters.end(); ++it){
140 Log() << kINFO << it->first <<
" = " << it->second << Endl;
142 return fTunedParameters;
150 std::vector< int > TMVA::OptimizeConfigParameters::GetScanIndices(
int val, std::vector<int> base){
151 std::vector < int > indices;
152 for (UInt_t i=0; i< base.size(); i++){
153 indices.push_back(val % base[i] );
154 val = int( floor(
float(val) /
float(base[i]) ) );
165 void TMVA::OptimizeConfigParameters::optimizeScan()
168 Double_t bestFOM=-1000000, currentFOM;
170 std::map<TString,Double_t> currentParameters;
171 std::map<TString,TMVA::Interval*>::iterator it;
175 currentParameters.clear();
176 fTunedParameters.clear();
178 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
179 currentParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
180 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,it->second->GetMin()));
187 std::vector< std::vector <Double_t> > v;
188 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
189 std::vector< Double_t > tmp;
190 for (Int_t k=0; k<it->second->GetNbins(); k++){
191 tmp.push_back(it->second->GetElement(k));
196 std::vector< int > Nindividual;
197 for (UInt_t i=0; i<v.size(); i++) {
199 Nindividual.push_back(v[i].size());
203 for (
int i=0; i<Ntot; i++){
205 std::vector<int> indices = GetScanIndices(i, Nindividual );
206 for (it=fTuneParameters.begin(), index=0; index< indices.size(); ++index, ++it){
207 currentParameters[it->first] = v[index][indices[index]];
209 Log() << kINFO <<
"--------------------------" << Endl;
210 Log() << kINFO <<
"Settings being evaluated:" << Endl;
211 for (std::map<TString,Double_t>::iterator it_print=currentParameters.begin();
212 it_print!=currentParameters.end(); ++it_print){
213 Log() << kINFO <<
" " << it_print->first <<
" = " << it_print->second << Endl;
216 GetMethod()->Reset();
217 GetMethod()->SetTuneParameters(currentParameters);
219 if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
220 if (i==0) GetMethod()->GetTransformationHandler().CalcTransformations(
221 GetMethod()->Data()->GetEventCollection());
222 Event::SetIsTraining(kTRUE);
223 GetMethod()->Train();
224 Event::SetIsTraining(kFALSE);
225 currentFOM = GetFOM();
226 Log() << kINFO <<
"FOM was found : " << currentFOM <<
"; current best is " << bestFOM << Endl;
228 if (currentFOM > bestFOM) {
229 bestFOM = currentFOM;
230 for (std::map<TString,Double_t>::iterator iter=currentParameters.begin();
231 iter != currentParameters.end(); ++iter){
232 fTunedParameters[iter->first]=iter->second;
237 GetMethod()->Reset();
238 GetMethod()->SetTuneParameters(fTunedParameters);
243 void TMVA::OptimizeConfigParameters::optimizeFit()
246 std::vector<TMVA::Interval*> ranges;
247 std::map<TString, TMVA::Interval*>::iterator it;
248 std::vector<Double_t> pars;
250 for (it=fTuneParameters.begin(); it != fTuneParameters.end(); ++it){
251 ranges.push_back(
new TMVA::Interval(*(it->second)));
252 pars.push_back( (it->second)->GetMean() );
258 GetMethod()->GetTransformationHandler().CalcTransformations(GetMethod()->Data()->GetEventCollection());
262 FitterBase* fitter = NULL;
264 if ( fOptimizationFitType ==
"Minuit" ) {
265 TString opt=
"FitStrategy=0:UseImprove=False:UseMinos=False:Tolerance=100";
266 if (!TMVA::gConfig().IsSilent() ) opt += TString(
":PrintLevel=0");
268 fitter =
new MinuitFitter( *
this,
269 "FitterMinuit_BDTOptimize",
271 }
else if ( fOptimizationFitType ==
"FitGA" ) {
272 TString opt=
"PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
273 fitter =
new GeneticFitter( *
this,
274 "FitterGA_BDTOptimize",
277 Log() << kWARNING <<
" you did not specify a valid OptimizationFitType "
278 <<
" will use the default (FitGA) " << Endl;
279 TString opt=
"PopSize=20:Steps=30:Cycles=3:ConvCrit=0.01:SaveBestCycle=5";
280 fitter =
new GeneticFitter( *
this,
281 "FitterGA_BDTOptimize",
285 fitter->CheckForUnusedOptions();
291 for (UInt_t ipar=0; ipar<ranges.size(); ipar++)
delete ranges[ipar];
293 GetMethod()->Reset();
295 fTunedParameters.clear();
297 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
298 fTunedParameters.insert(std::pair<TString,Double_t>(it->first,pars[jcount++]));
301 GetMethod()->SetTuneParameters(fTunedParameters);
308 Double_t TMVA::OptimizeConfigParameters::EstimatorFunction( std::vector<Double_t> & pars)
310 std::map< std::vector<Double_t> , Double_t>::const_iterator iter;
311 iter = fAlreadyTrainedParCombination.find(pars);
313 if (iter != fAlreadyTrainedParCombination.end()) {
319 std::map<TString,Double_t> currentParameters;
322 std::map<TString, TMVA::Interval*>::iterator it;
323 for (it=fTuneParameters.begin(); it!=fTuneParameters.end(); ++it){
324 currentParameters[it->first] = pars[icount++];
326 GetMethod()->Reset();
327 GetMethod()->SetTuneParameters(currentParameters);
328 if(!GetMethod()->IsSilentFile()) GetMethod()->BaseDir()->cd();
331 GetMethod()->GetTransformationHandler().
332 CalcTransformations(GetMethod()->Data()->GetEventCollection());
335 Event::SetIsTraining(kTRUE);
336 GetMethod()->Train();
337 Event::SetIsTraining(kFALSE);
340 Double_t currentFOM = GetFOM();
342 fAlreadyTrainedParCombination.insert(std::make_pair(pars,-currentFOM));
351 Double_t TMVA::OptimizeConfigParameters::GetFOM()
353 auto parsePercent = [
this](TString input) -> Double_t {
356 TString percent = TString(input(14, input.Sizeof()));
357 if (!percent.CountChar(
'.')) percent.Insert(1,
".");
359 if (percent.IsFloat()) {
360 return percent.Atof();
362 Log() << kFATAL <<
" ERROR, " << percent <<
" in " << fFOMType
363 <<
" is not a valid floating point number" << Endl;
369 if (fMethod->DoRegression()){
370 std::cout <<
" ERROR: Sorry, Regression is not yet implement for automatic parameter optimisation"
371 <<
" --> exit" << std::endl;
374 if (fFOMType ==
"Separation") fom = GetSeparation();
375 else if (fFOMType ==
"ROCIntegral") fom = GetROCIntegral();
376 else if (fFOMType.BeginsWith(
"SigEffAtBkgEff0")) fom = GetSigEffAtBkgEff(parsePercent(fFOMType));
377 else if (fFOMType.BeginsWith(
"BkgRejAtSigEff0")) fom = GetBkgRejAtSigEff(parsePercent(fFOMType));
378 else if (fFOMType.BeginsWith(
"BkgEffAtSigEff0")) fom = GetBkgEffAtSigEff(parsePercent(fFOMType));
380 Log()<< kFATAL <<
" ERROR, you've specified as Figure of Merit in the "
381 <<
" parameter optimisation " << fFOMType <<
" which has not"
382 <<
" been implemented yet!! ---> exit " << Endl;
386 fFOMvsIter.push_back(fom);
394 void TMVA::OptimizeConfigParameters::GetMVADists()
396 if (fMvaSig) fMvaSig->Delete();
397 if (fMvaBkg) fMvaBkg->Delete();
398 if (fMvaSigFineBin) fMvaSigFineBin->Delete();
399 if (fMvaBkgFineBin) fMvaBkgFineBin->Delete();
407 fMvaSig =
new TH1D(
"fMvaSig",
"",100,-1.5,1.5);
408 fMvaBkg =
new TH1D(
"fMvaBkg",
"",100,-1.5,1.5);
409 fMvaSigFineBin =
new TH1D(
"fMvaSigFineBin",
"",100000,-1.5,1.5);
410 fMvaBkgFineBin =
new TH1D(
"fMvaBkgFineBin",
"",100000,-1.5,1.5);
412 const std::vector< Event*> events=fMethod->Data()->GetEventCollection(Types::kTesting);
414 UInt_t signalClassNr = fMethod->DataInfo().GetClassInfo(
"Signal")->GetNumber();
418 for (UInt_t iev=0; iev < events.size() ; iev++){
422 if (events[iev]->GetClass() == signalClassNr) {
423 fMvaSig->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
424 fMvaSigFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
426 fMvaBkg->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
427 fMvaBkgFineBin->Fill(fMethod->GetMvaValue(events[iev]),events[iev]->GetWeight());
435 Double_t TMVA::OptimizeConfigParameters::GetSeparation()
439 PDF *splS =
new PDF(
" PDF Sig", fMvaSig, PDF::kSpline2 );
440 PDF *splB =
new PDF(
" PDF Bkg", fMvaBkg, PDF::kSpline2 );
441 return gTools().GetSeparation(*splS,*splB);
443 std::cout <<
"Separation calculation via histograms (not PDFs) seems to give still strange results!! Don't do that, check!!"<<std::endl;
444 return gTools().GetSeparation(fMvaSigFineBin,fMvaBkgFineBin);
459 Double_t TMVA::OptimizeConfigParameters::GetROCIntegral()
463 Double_t integral = 0;
465 PDF *pdfS =
new PDF(
" PDF Sig", fMvaSig, PDF::kSpline2 );
466 PDF *pdfB =
new PDF(
" PDF Bkg", fMvaBkg, PDF::kSpline2 );
468 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
469 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
471 UInt_t nsteps = 1000;
472 Double_t step = (xmax-xmin)/Double_t(nsteps);
474 for (UInt_t i=0; i<nsteps; i++){
475 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
481 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
482 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
483 std::cout <<
" Error in OptimizeConfigParameters GetROCIntegral, unequal histograms for sig and bkg.." << std::endl;
487 Double_t *cumulator = fMvaBkgFineBin->GetIntegral();
488 Int_t nbins = fMvaSigFineBin->GetNbinsX();
492 Double_t sigIntegral = 0;
493 for (Int_t ibin=1; ibin<=nbins; ibin++){
494 sigIntegral += fMvaSigFineBin->GetBinContent(ibin) * fMvaSigFineBin->GetBinWidth(ibin);
498 for (Int_t ibin=1; ibin <= nbins; ibin++){
499 integral += (cumulator[ibin]) * fMvaSigFineBin->GetBinContent(ibin)/sigIntegral * fMvaSigFineBin->GetBinWidth(ibin) ;
510 Double_t TMVA::OptimizeConfigParameters::GetSigEffAtBkgEff(Double_t bkgEff)
516 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
517 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
518 std::cout <<
" Error in OptimizeConfigParameters GetSigEffAt, unequal histograms for sig and bkg.." << std::endl;
521 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
522 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
524 Int_t nbins=fMvaBkgFineBin->GetNbinsX();
533 while (bkgCumulator[nbins-ibin] > (1-bkgEff)) {
534 sigEff = sigCumulator[nbins]-sigCumulator[nbins-ibin];
547 Double_t TMVA::OptimizeConfigParameters::GetBkgEffAtSigEff(Double_t sigEff)
553 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
554 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
555 std::cout <<
" Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
559 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
560 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
562 Int_t nbins=fMvaBkgFineBin->GetNbinsX();
571 while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
572 bkgEff = bkgCumulator[nbins]-bkgCumulator[nbins-ibin];
584 Double_t TMVA::OptimizeConfigParameters::GetBkgRejAtSigEff(Double_t sigEff)
590 if ( (fMvaSigFineBin->GetXaxis()->GetXmin() != fMvaBkgFineBin->GetXaxis()->GetXmin()) ||
591 (fMvaSigFineBin->GetNbinsX() != fMvaBkgFineBin->GetNbinsX()) ){
592 std::cout <<
" Error in OptimizeConfigParameters GetBkgEffAt, unequal histograms for sig and bkg.." << std::endl;
596 Double_t *bkgCumulator = fMvaBkgFineBin->GetIntegral();
597 Double_t *sigCumulator = fMvaSigFineBin->GetIntegral();
599 Int_t nbins=fMvaBkgFineBin->GetNbinsX();
608 while ( sigCumulator[nbins]-sigCumulator[nbins-ibin] < sigEff) {
609 bkgRej = bkgCumulator[nbins-ibin];