Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
regression_averagedevs.cxx
Go to the documentation of this file.
1 #include <limits>
2 
4 
5 #include "TLatex.h"
6 #include "TGraphErrors.h"
7 #include "TFrame.h"
8 
9 /*
10  this macro plots the quadratic deviation of the estimated from the target value, averaged over the first nevt events in test sample (all if Nevt=-1)
11  a) normal average
12  b) truncated average, using best 90%
13  created January 2009, Eckhard von Toerne, University of Bonn, Germany
14 */
15 
16 void TMVA::regression_averagedevs(TString dataset,TString fin, Int_t Nevt, Bool_t useTMVAStyle )
17 {
18  bool debug=false;
19  if (Nevt <0) Nevt=1000000;
20  TMVAGlob::Initialize( useTMVAStyle );
21  // checks if file with name "fin" is already open, and if not opens one
22  TFile* file = TMVAGlob::OpenFile( fin );
23  TList jobDirList;
24  TMVAGlob::GetListOfJobs((TFile*)file->GetDirectory(dataset.Data()),jobDirList);
25  if (jobDirList.GetSize()==0) {
26  cout << "error could not find jobs" << endl;
27  return;
28  }
29 
30  Bool_t __PLOT_LOGO__ = kTRUE;
31  Bool_t __SAVE_IMAGE__ = kTRUE;
32 
33  TDirectory* dir0 = (TDirectory*) (jobDirList.At(0));
34  //TDirectory* dir0 = (TDirectory*) (file->Get("InputVariables_Id"));
35  Int_t nTargets = TMVAGlob::GetNumberOfTargets( dir0);
36 
37  if (debug) cout << "found targets " << nTargets<<endl;
38  TCanvas* c=0;
39  for (Int_t itrgt = 0 ; itrgt < nTargets; itrgt++){
40  if (debug) cout << "loop targets " << itrgt<<endl;
41  TString xtit = "Method";
42  TString ytit = "Average Quadratic Deviation";
43  TString ftit = ytit + " versus " + xtit + Form(" for target %d",itrgt);
44  c = new TCanvas( Form("c%d",itrgt), ftit , 50+20*itrgt, 10*itrgt, 750, 650 );
45 
46  // global style settings
47  c->SetGrid();
48  c->SetTickx(1);
49  c->SetTicky(0);
50  c->SetTopMargin(0.28);
51  c->SetBottomMargin(0.1);
52 
53  TString hNameRef(Form("regression_average_devs_target%d",itrgt));
54 
55  const Int_t maxMethods = 100;
56  // const Int_t maxTargets = 100;
57  Float_t m[4][maxMethods]; // h0 train-all, h1 train-90%, h2 test-all, h3 test-90%
58  Float_t em[4][maxMethods];
59  Float_t x[4][maxMethods];
60  Float_t ex[4][maxMethods];
61 
62  TIter next(&jobDirList);
63  Float_t mymax=0., mymin=std::numeric_limits<float>::max();
64  TString mvaNames[maxMethods];
65  TDirectory *jobDir;
66  Int_t nMethods = 0;
67  // loop over all methods
68  while ( (jobDir = (TDirectory*)next()) ) {
69  TString methodTitle;
70  TMVAGlob::GetMethodTitle(methodTitle,jobDir);
71  mvaNames[nMethods]=methodTitle;
72  if (debug) cout << "--- Found directory for method: " << methodTitle << endl;
73  TIter keyIt(jobDir->GetListOfKeys());
74  TKey *histKey;
75  while ( (histKey = (TKey*)keyIt()) ) {
76  if (histKey->ReadObj()->InheritsFrom("TH1F") ){
77  TString s(histKey->ReadObj()->GetName());
78  if( !s.Contains("Quadr_Dev") ) continue;
79  if( !s.Contains(Form("target_%d_",itrgt))) continue;
80  Int_t ihist = 0 ;
81  if( !s.Contains("best90perc") && s.Contains("train")) ihist=0;
82  if( s.Contains("best90perc") && s.Contains("train")) ihist=1;
83  if( !s.Contains("best90perc") && s.Contains("test")) ihist=2;
84  if( s.Contains("best90perc") && s.Contains("test")) ihist=3;
85  if (debug) cout <<"using histogram" << s << ", ihist="<<ihist<<endl;
86  TH1F* h = (TH1F*) (histKey->ReadObj());
87  m[ihist][nMethods] = sqrt(h->GetMean());
88  em[ihist][nMethods] = h->GetRMS()/(sqrt(h->GetEntries())*2.*h->GetMean());
89  x[ihist][nMethods] = nMethods+0.44+0.12*ihist;
90  ex[ihist][nMethods] = 0.001;
91  mymax= m[ihist][nMethods] > mymax ? m[ihist][nMethods] : mymax;
92  mymin= m[ihist][nMethods] < mymin ? m[ihist][nMethods] : mymin;
93  if (debug) cout << "m"<< ihist << "="<<m[ihist][nMethods]<<endl;
94  }
95  }
96  nMethods++;
97  }
98  TH1F* haveragedevs= new TH1F(Form("haveragedevs%d",itrgt),ftit,nMethods,0.,nMethods);
99  for (int i=0;i<nMethods;i++) haveragedevs->GetXaxis()->SetBinLabel(i+1, mvaNames[i]);
100  haveragedevs->SetStats(0);
101  TGraphErrors* graphTrainAv= new TGraphErrors(nMethods,x[0],m[0],ex[0],em[0]);
102  TGraphErrors* graphTruncTrainAv= new TGraphErrors(nMethods,x[1],m[1],ex[1],em[1]);
103  TGraphErrors* graphTestAv= new TGraphErrors(nMethods,x[2],m[2],ex[2],em[2]);
104  TGraphErrors* graphTruncTestAv= new TGraphErrors(nMethods,x[3],m[3],ex[3],em[3]);
105 
106  Double_t xmax = 1.2 * mymax;
107  Double_t xmin = 0.8 * mymin - (mymax - mymin)*0.05;
108  Double_t xheader = 0.2;
109  Double_t yheader = xmax*0.92;
110  xmin = xmin > 0.? xmin : 0.;
111  if (mymin > 1.e-20 && log10(mymax/mymin)>1.5){
112  c->SetLogy();
113  cout << "--- result differ significantly using log scale for display of regression results"<< endl;
114  xmax = 1.5 * xmax;
115  xmin = 0.75 * mymin;
116  yheader = xmax*0.78;
117  }
118  Float_t x0L = 0.03, y0H = 0.91;
119  Float_t dxL = 0.457-x0L, dyH = 0.14;
120  // TLegend *legend = new TLegend( x0L, y0H-dyH, x0L+dxL, y0H , "Average Deviation = (#sum_{evts} (f_{MVA} - f_{target})^{2} )^{1/2}");
121  TLegend *legend = new TLegend( x0L, y0H-dyH, x0L+dxL, y0H );
122  legend->SetTextSize( 0.035 );
123  legend->SetTextAlign(12);
124  legend->SetMargin( 0.1 );
125 
126  TH1F *hr = c->DrawFrame(-1.,0.,nMethods+1, xmax);
127  cout << endl;
128  cout << "Training: Average Deviation between target " << itrgt <<" and estimate" << endl;
129  cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
130  for (int i=0;i<nMethods;i++){
131  cout << Form("%-15s:%#10.3g%#10.3g",
132  (const char*)mvaNames[i], m[0][i],m[1][i])<<endl;
133  // cout << mvaNames[i] << " " << m[0][i]<< " "<< m[1][i]<<endl;
134  hr->GetXaxis()->SetBinLabel(i+1," ");
135  }
136  cout << endl;
137  cout << "Testing: Average Deviation between target " << itrgt <<" and estimate" << endl;
138  cout << Form("%-15s%-15s%-15s", "Method","Average Dev.","trunc. Aver.(90%)") <<endl;
139  for (int i=0;i<nMethods;i++){
140  cout << Form("%-15s:%#10.3g%#10.3g",
141  (const char*)mvaNames[i], m[2][i],m[3][i])<<endl;
142  //cout << mvaNames[i] << " " << m[2][i]<< " "<< m[3][i]<<endl;
143  }
144 
145  haveragedevs->SetMinimum(xmin);
146  haveragedevs->SetMaximum(xmax);
147  haveragedevs->SetXTitle("Method");
148  haveragedevs->SetYTitle("Deviation from target");
149  haveragedevs->Draw();
150  c->GetFrame()->SetFillColor(21);
151  c->GetFrame()->SetBorderSize(12);
152  graphTrainAv->SetMarkerSize(1.);
153  graphTrainAv->SetMarkerColor(kBlue);
154  graphTrainAv->SetMarkerStyle(25);
155  graphTrainAv->Draw("P");
156 
157  graphTruncTrainAv->SetMarkerSize(1.);
158  graphTruncTrainAv->SetMarkerColor(kBlack);
159  graphTruncTrainAv->SetMarkerStyle(25);
160  graphTruncTrainAv->Draw("P");
161 
162  graphTestAv->SetMarkerSize(1.);
163  graphTestAv->SetMarkerColor(kBlue);
164  graphTestAv->SetMarkerStyle(21);
165  graphTestAv->Draw("P");
166 
167  graphTruncTestAv->SetMarkerSize(1.);
168  graphTruncTestAv->SetMarkerColor(kBlack);
169  graphTruncTestAv->SetMarkerStyle(21);
170  graphTruncTestAv->Draw("P");
171  legend->AddEntry(graphTrainAv,TString("Training Sample, Average Deviation"),"p");
172  legend->AddEntry(graphTruncTrainAv,TString("Training Sample, truncated Average Dev. (best 90%)"),"p");
173  legend->AddEntry(graphTestAv,TString("Test Sample, Average Deviation"),"p");
174  legend->AddEntry(graphTruncTestAv,TString("Test Sample, truncated Average Dev. (best 90%)"),"p");
175 
176  legend->Draw();
177  TLatex legHeader;
178  legHeader.SetTextSize(0.035);
179  legHeader.SetTextAlign(12);
180  //legHeader.DrawLatex(x0L, y0H+0.01, "Average Deviation = (#sum (_{ } f_{MVA} - f_{target})^{2} )^{1/2}");
181  legHeader.DrawLatex(xheader, yheader, "Average Deviation = (#sum (_{ } f_{MVA} - f_{target})^{2} )^{1/2}");
182  // ============================================================
183 
184  if (__PLOT_LOGO__) TMVAGlob::plot_logo();
185  // ============================================================
186 
187  c->Update();
188  TString fname = dataset+"/plots/" + hNameRef;
189  if (__SAVE_IMAGE__) TMVAGlob::imgconv( c, fname );
190  } // end loop itrgt
191  return;
192 }
193