44 TMVA::VariableImportanceResult::VariableImportanceResult():fImportanceValues(
"VariableImportance"),
45 fImportanceHist(nullptr)
52 TMVA::VariableImportanceResult::VariableImportanceResult(
const VariableImportanceResult &obj)
54 fImportanceValues = obj.fImportanceValues;
55 fImportanceHist = obj.fImportanceHist;
60 void TMVA::VariableImportanceResult::Print()
const
62 TMVA::MsgLogger::EnableOutput();
63 TMVA::gConfig().SetSilent(kFALSE);
65 MsgLogger fLogger(
"VariableImportance");
66 if(fType==VIType::kShort)
68 fLogger<<kINFO<<
"Variable Importance Results (Short)"<<Endl;
69 }
else if(fType==VIType::kAll)
71 fLogger<<kINFO<<
"Variable Importance Results (All)"<<Endl;
73 fLogger<<kINFO<<
"Variable Importance Results (Random)"<<Endl;
76 fImportanceValues.Print();
77 TMVA::gConfig().SetSilent(kTRUE);
82 TCanvas* TMVA::VariableImportanceResult::Draw(
const TString name)
const
84 TCanvas *c=
new TCanvas(name.Data());
85 fImportanceHist->Draw(
"");
86 fImportanceHist->GetXaxis()->SetTitle(
" Variable Names ");
87 fImportanceHist->GetYaxis()->SetTitle(
" Importance (%) ");
94 TMVA::VariableImportance::VariableImportance(TMVA::DataLoader *dataloader):TMVA::Envelope(
"VariableImportance",dataloader,nullptr),fType(VIType::kShort)
96 fClassifier=std::unique_ptr<Factory>(
new TMVA::Factory(
"VariableImportance",
"!V:!ROC:!ModelPersistence:Silent:Color:!DrawProgressBar:AnalysisType=Classification"));
101 TMVA::VariableImportance::~VariableImportance()
108 void TMVA::VariableImportance::Evaluate()
112 if(fType==VIType::kShort)
114 EvaluateImportanceShort();
115 }
else if(fType==VIType::kAll)
117 EvaluateImportanceAll();
119 UInt_t nbits=fDataLoader->GetDefaultDataSetInfo().GetNVariables();
121 Log()<<kERROR<<
"Running variable importance with less that 10 varibales in Random mode "<<
122 "can to produce inconsisten results"<<Endl;
123 EvaluateImportanceRandom(pow(nbits,2));
125 fResults.fType = fType;
126 TMVA::MsgLogger::EnableOutput();
127 TMVA::gConfig().SetSilent(kFALSE);
128 Log()<<kINFO<<
"Evaluation done."<<Endl;
129 TMVA::gConfig().SetSilent(kTRUE);
134 ULong_t TMVA::VariableImportance::Sum(ULong_t i)
137 for(ULong_t n=0;n<i;n++) sum+=pow(2,n);
143 TH1F* TMVA::VariableImportance::GetImportance(
const UInt_t nbits,std::vector<Float_t> &importances,std::vector<TString> &varNames)
145 TH1F *vihist =
new TH1F(
"vihist",
"", nbits, 0, nbits);
147 gStyle->SetOptStat(000000);
149 Float_t normalization = 0.0;
150 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
154 gStyle->SetTitleXOffset(0.4);
155 gStyle->SetTitleXOffset(1.2);
158 for (UInt_t i = 1; i < nbits + 1; i++) {
159 roc = 100.0 * importances[i - 1] / normalization;
160 vihist->GetXaxis()->SetBinLabel(i, varNames[i - 1].Data());
161 vihist->SetBinContent(i, roc);
164 vihist->LabelsOption(
"v >",
"X");
165 vihist->SetBarWidth(0.97);
166 vihist->SetFillColor(TColor::GetColor(
"#006600"));
168 vihist->GetXaxis()->SetTitle(
" Variable Names ");
169 vihist->GetXaxis()->SetTitleSize(0.045);
170 vihist->GetXaxis()->CenterTitle();
171 vihist->GetXaxis()->SetTitleOffset(1.24);
173 vihist->GetYaxis()->SetTitle(
" Importance (%)");
174 vihist->GetYaxis()->SetTitleSize(0.045);
175 vihist->GetYaxis()->CenterTitle();
176 vihist->GetYaxis()->SetTitleOffset(1.24);
178 vihist->GetYaxis()->SetRangeUser(-7, 50);
179 vihist->SetDirectory(0);
186 void TMVA::VariableImportance::EvaluateImportanceShort()
188 for (
auto &meth : fMethods) {
189 TString methodName = meth.GetValue<TString>(
"MethodName");
190 TString methodTitle = meth.GetValue<TString>(
"MethodTitle");
191 TString methodOptions = meth.GetValue<TString>(
"MethodOptions");
196 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
197 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
199 ULong_t range = Sum(nbits);
202 std::vector<Float_t> importances(nbits);
203 for (UInt_t i = 0; i < nbits; i++)
210 std::bitset<NBITS> xbitset(x);
212 Log() << kFATAL <<
"Error: need at least one variable.";
215 TMVA::DataLoader *seeddl =
new TMVA::DataLoader(xbitset.to_string());
218 for (UInt_t index = 0; index < nbits; index++) {
220 seeddl->AddVariable(varNames[index],
'F');
224 DataLoaderCopy(seeddl, fDataLoader.get());
227 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
230 fClassifier->TrainAllMethods();
231 fClassifier->TestAllMethods();
232 fClassifier->EvaluateAllMethods();
235 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
238 fClassifier->DeleteAllMethods();
239 fClassifier->fMethodsMap.clear();
241 for (uint32_t i = 0; i < NBITS; ++i) {
244 std::bitset<NBITS> ybitset(y);
248 Double_t ny = log(x - y) / 0.693147;
250 importances[ny] = SROC - 0.5;
255 TMVA::DataLoader *subseeddl =
new TMVA::DataLoader(ybitset.to_string());
257 for (UInt_t index = 0; index < nbits; index++) {
258 if (ybitset[index]) subseeddl->AddVariable(varNames[index],
'F');
262 DataLoaderCopy(subseeddl,fDataLoader.get());
265 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
268 fClassifier->TrainAllMethods();
269 fClassifier->TestAllMethods();
270 fClassifier->EvaluateAllMethods();
273 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
274 importances[ny] += SROC - SSROC;
277 fClassifier->DeleteAllMethods();
278 fClassifier->fMethodsMap.clear();
281 Float_t normalization = 0.0;
282 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
284 for(UInt_t i=0;i<nbits;i++){
286 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
288 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+
" % ";
290 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
296 void TMVA::VariableImportance::EvaluateImportanceRandom(UInt_t seeds)
298 for (
auto &meth : fMethods) {
300 TString methodName = meth.GetValue<TString>(
"MethodName");
301 TString methodTitle = meth.GetValue<TString>(
"MethodTitle");
302 TString methodOptions = meth.GetValue<TString>(
"MethodOptions");
304 TRandom3 *rangen =
new TRandom3(0);
310 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
311 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
313 ULong_t range = pow(2, nbits);
316 std::vector<Float_t> importances(nbits);
317 Float_t importances_norm = 0;
319 for (UInt_t i = 0; i < nbits; i++)
326 for (UInt_t n = 0; n < seeds; n++) {
327 x = rangen->Integer(range);
329 std::bitset<NBITS> xbitset(x);
334 TMVA::DataLoader *seeddl =
new TMVA::DataLoader(xbitset.to_string());
337 for (UInt_t index = 0; index < nbits; index++) {
338 if (xbitset[index]) seeddl->AddVariable(varNames[index],
'F');
342 DataLoaderCopy(seeddl,fDataLoader.get());
345 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
348 fClassifier->TrainAllMethods();
349 fClassifier->TestAllMethods();
350 fClassifier->EvaluateAllMethods();
353 SROC = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
356 fClassifier->DeleteAllMethods();
357 fClassifier->fMethodsMap.clear();
359 for (uint32_t i = 0; i < 32; ++i) {
362 std::bitset<NBITS> ybitset(y);
366 Double_t ny = log(x - y) / 0.693147;
368 importances[ny] = SROC - 0.5;
369 importances_norm += importances[ny];
374 TMVA::DataLoader *subseeddl =
new TMVA::DataLoader(ybitset.to_string());
376 for (UInt_t index = 0; index < nbits; index++) {
377 if (ybitset[index]) subseeddl->AddVariable(varNames[index],
'F');
381 DataLoaderCopy(subseeddl,fDataLoader.get());
384 fClassifier->BookMethod(subseeddl, methodName, methodTitle, methodOptions);
387 fClassifier->TrainAllMethods();
388 fClassifier->TestAllMethods();
389 fClassifier->EvaluateAllMethods();
392 SSROC = fClassifier->GetROCIntegral(ybitset.to_string(), methodTitle);
393 importances[ny] += SROC - SSROC;
396 fClassifier->DeleteAllMethods();
397 fClassifier->fMethodsMap.clear();
402 Float_t normalization = 0.0;
403 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
405 for(UInt_t i=0;i<nbits;i++){
407 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
409 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+
" % ";
411 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));
418 void TMVA::VariableImportance::EvaluateImportanceAll()
420 for (
auto &meth : fMethods) {
421 TString methodName = meth.GetValue<TString>(
"MethodName");
422 TString methodTitle = meth.GetValue<TString>(
"MethodTitle");
423 TString methodOptions = meth.GetValue<TString>(
"MethodOptions");
429 const UInt_t nbits = fDataLoader->GetDefaultDataSetInfo().GetNVariables();
430 std::vector<TString> varNames = fDataLoader->GetDefaultDataSetInfo().GetListOfVariables();
432 ULong_t range = pow(2, nbits);
435 std::vector<Float_t> importances(nbits);
438 std::vector<Float_t> ROC(range);
440 for (UInt_t i = 0; i < nbits; i++)
444 for (x = 1; x < range; x++) {
446 std::bitset<NBITS> xbitset(x);
451 TMVA::DataLoader *seeddl =
new TMVA::DataLoader(xbitset.to_string());
454 for (UInt_t index = 0; index < nbits; index++) {
455 if (xbitset[index]) seeddl->AddVariable(varNames[index],
'F');
458 DataLoaderCopy(seeddl,fDataLoader.get());
460 seeddl->PrepareTrainingAndTestTree(fDataLoader->GetDefaultDataSetInfo().GetCut(
"Signal"), fDataLoader->GetDefaultDataSetInfo().GetCut(
"Background"), fDataLoader->GetDefaultDataSetInfo().GetSplitOptions());
463 fClassifier->BookMethod(seeddl, methodName, methodTitle, methodOptions);
466 fClassifier->TrainAllMethods();
467 fClassifier->TestAllMethods();
468 fClassifier->EvaluateAllMethods();
471 ROC[x] = fClassifier->GetROCIntegral(xbitset.to_string(), methodTitle);
474 fClassifier->DeleteAllMethods();
475 fClassifier->fMethodsMap.clear();
479 for ( x = 0; x <range ; x++)
482 for (uint32_t i = 0; i < NBITS; ++i) {
485 std::bitset<NBITS> ybitset(y);
487 Float_t ny = log(x - y) / 0.693147;
489 importances[ny] = SROC - 0.5;
495 importances[ny] += SROC - SSROC;
500 Float_t normalization = 0.0;
501 for (UInt_t i = 0; i < nbits; i++) normalization += importances[i];
503 for(UInt_t i=0;i<nbits;i++){
505 fResults.fImportanceValues[varNames[i]]=(100.0 * importances[i] / normalization);
507 fResults.fImportanceValues[varNames[i]]=fResults.fImportanceValues.GetValue<TString>(varNames[i])+
" % ";
509 fResults.fImportanceHist = std::shared_ptr<TH1F>(GetImportance(nbits,importances,varNames));