Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TMLPAnalyzer.cxx
Go to the documentation of this file.
1 // @(#)root/mlp:$Id$
2 // Author: Christophe.Delaere@cern.ch 25/04/04
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2003, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class TMLPAnalyzer
13 
14 This utility class contains a set of tests usefull when developing
15 a neural network.
16 It allows you to check for unneeded variables, and to control
17 the network structure.
18 
19 */
20 
21 #include "TROOT.h"
22 #include "TSynapse.h"
23 #include "TNeuron.h"
24 #include "TMultiLayerPerceptron.h"
25 #include "TMLPAnalyzer.h"
26 #include "TTree.h"
27 #include "TTreeFormula.h"
28 #include "TEventList.h"
29 #include "TH1D.h"
30 #include "TProfile.h"
31 #include "THStack.h"
32 #include "TLegend.h"
33 #include "TPad.h"
34 #include "TCanvas.h"
35 #include "TGaxis.h"
36 #include "TRegexp.h"
37 #include "TMath.h"
38 #include "Riostream.h"
39 #include <stdlib.h>
40 
41 ClassImp(TMLPAnalyzer);
42 
43 ////////////////////////////////////////////////////////////////////////////////
44 /// Destructor
45 
46 TMLPAnalyzer::~TMLPAnalyzer()
47 {
48  delete fAnalysisTree;
49  delete fIOTree;
50 }
51 
52 ////////////////////////////////////////////////////////////////////////////////
53 /// Returns the number of layers.
54 
55 Int_t TMLPAnalyzer::GetLayers()
56 {
57  TString fStructure = fNetwork->GetStructure();
58  return fStructure.CountChar(':')+1;
59 }
60 
61 ////////////////////////////////////////////////////////////////////////////////
62 /// Returns the number of neurons in given layer.
63 
64 Int_t TMLPAnalyzer::GetNeurons(Int_t layer)
65 {
66  if(layer==1) {
67  TString fStructure = fNetwork->GetStructure();
68  TString input = TString(fStructure(0, fStructure.First(':')));
69  return input.CountChar(',')+1;
70  }
71  else if(layer==GetLayers()) {
72  TString fStructure = fNetwork->GetStructure();
73  TString output = TString(fStructure(fStructure.Last(':') + 1,
74  fStructure.Length() - fStructure.Last(':')));
75  return output.CountChar(',')+1;
76  }
77  else {
78  Int_t cnt=1;
79  TString fStructure = fNetwork->GetStructure();
80  TString hidden = TString(fStructure(fStructure.First(':') + 1,
81  fStructure.Last(':') - fStructure.First(':') - 1));
82  Int_t beg = 0;
83  Int_t end = hidden.Index(":", beg + 1);
84  Int_t num = 0;
85  while (end != -1) {
86  num = atoi(TString(hidden(beg, end - beg)).Data());
87  cnt++;
88  beg = end + 1;
89  end = hidden.Index(":", beg + 1);
90  if(layer==cnt) return num;
91  }
92  num = atoi(TString(hidden(beg, hidden.Length() - beg)).Data());
93  cnt++;
94  if(layer==cnt) return num;
95  }
96  return -1;
97 }
98 
99 ////////////////////////////////////////////////////////////////////////////////
100 /// Returns the formula used as input for neuron (idx) in
101 /// the first layer.
102 
103 TString TMLPAnalyzer::GetNeuronFormula(Int_t idx)
104 {
105  TString fStructure = fNetwork->GetStructure();
106  TString input = TString(fStructure(0, fStructure.First(':')));
107  Int_t beg = 0;
108  Int_t end = input.Index(",", beg + 1);
109  TString brName;
110  Int_t cnt = 0;
111  while (end != -1) {
112  brName = TString(input(beg, end - beg));
113  if (brName[0]=='@')
114  brName = brName(1,brName.Length()-1);
115  beg = end + 1;
116  end = input.Index(",", beg + 1);
117  if(cnt==idx) return brName;
118  cnt++;
119  }
120  brName = TString(input(beg, input.Length() - beg));
121  if (brName[0]=='@')
122  brName = brName(1,brName.Length()-1);
123  return brName;
124 }
125 
126 ////////////////////////////////////////////////////////////////////////////////
127 /// Returns the name of any neuron from the input layer
128 
129 const char* TMLPAnalyzer::GetInputNeuronTitle(Int_t in)
130 {
131  TNeuron* neuron=(TNeuron*)fNetwork->fFirstLayer[in];
132  return neuron ? neuron->GetName() : "NO SUCH NEURON";
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////////
136 /// Returns the name of any neuron from the output layer
137 
138 const char* TMLPAnalyzer::GetOutputNeuronTitle(Int_t out)
139 {
140  TNeuron* neuron=(TNeuron*)fNetwork->fLastLayer[out];
141  return neuron ? neuron->GetName() : "NO SUCH NEURON";
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// Gives some information about the network in the terminal.
146 
147 void TMLPAnalyzer::CheckNetwork()
148 {
149  TString fStructure = fNetwork->GetStructure();
150  std::cout << "Network with structure: " << fStructure.Data() << std::endl;
151  std::cout << "inputs with low values in the differences plot may not be needed" << std::endl;
152  // Checks if some input variable is not needed
153  char var[64], sel[64];
154  for (Int_t i = 0; i < GetNeurons(1); i++) {
155  snprintf(var,64,"diff>>tmp%d",i);
156  snprintf(sel,64,"inNeuron==%d",i);
157  fAnalysisTree->Draw(var, sel, "goff");
158  TH1F* tmp = (TH1F*)gDirectory->Get(Form("tmp%d",i));
159  if (!tmp) continue;
160  std::cout << GetInputNeuronTitle(i)
161  << " -> " << tmp->GetMean()
162  << " +/- " << tmp->GetRMS() << std::endl;
163  }
164 }
165 
166 ////////////////////////////////////////////////////////////////////////////////
167 /// Collect information about what is useful in the network.
168 /// This method has to be called first when analyzing a network.
169 /// Fills the two analysis trees.
170 
171 void TMLPAnalyzer::GatherInformations()
172 {
173  Double_t shift = 0.1;
174  TTree* data = fNetwork->fData;
175  TEventList* test = fNetwork->fTest;
176  Int_t nEvents = test->GetN();
177  Int_t nn = GetNeurons(1);
178  Double_t* params = new Double_t[nn];
179  Double_t* rms = new Double_t[nn];
180  TTreeFormula** formulas = new TTreeFormula*[nn];
181  Int_t* index = new Int_t[nn];
182  TString formula;
183  TRegexp re("{[0-9]+}$");
184  Ssiz_t len = formula.Length();
185  Ssiz_t pos = -1;
186  Int_t i(0), j(0), k(0), l(0);
187  for(i=0; i<nn; i++){
188  formula = GetNeuronFormula(i);
189  pos = re.Index(formula,&len);
190  if(pos==-1 || len<3) {
191  formulas[i] = new TTreeFormula(Form("NF%lu",(ULong_t)this),formula,data);
192  index[i] = 0;
193  }
194  else {
195  TString newformula(formula,pos);
196  TString val = formula(pos+1,len-2);
197  formulas[i] = new TTreeFormula(Form("NF%lu",(ULong_t)this),newformula,data);
198  formula = newformula;
199  index[i] = val.Atoi();
200  }
201  TH1D tmp("tmpb", "tmpb", 1, -FLT_MAX, FLT_MAX);
202  data->Draw(Form("%s>>tmpb",formula.Data()),"","goff");
203  rms[i] = tmp.GetRMS();
204  }
205  Int_t inNeuron = 0;
206  Double_t diff = 0.;
207  if(fAnalysisTree) delete fAnalysisTree;
208  fAnalysisTree = new TTree("result","analysis");
209  fAnalysisTree->SetDirectory(0);
210  fAnalysisTree->Branch("inNeuron",&inNeuron,"inNeuron/I");
211  fAnalysisTree->Branch("diff",&diff,"diff/D");
212  Int_t numOutNodes=GetNeurons(GetLayers());
213  Double_t *outVal=new Double_t[numOutNodes];
214  Double_t *trueVal=new Double_t[numOutNodes];
215 
216  delete fIOTree;
217  fIOTree=new TTree("MLP_iotree","MLP_iotree");
218  fIOTree->SetDirectory(0);
219  TString leaflist;
220  for (i=0; i<nn; i++)
221  leaflist+=Form("In%d/D:",i);
222  leaflist.Remove(leaflist.Length()-1);
223  fIOTree->Branch("In", params, leaflist);
224 
225  leaflist="";
226  for (i=0; i<numOutNodes; i++)
227  leaflist+=Form("Out%d/D:",i);
228  leaflist.Remove(leaflist.Length()-1);
229  fIOTree->Branch("Out", outVal, leaflist);
230 
231  leaflist="";
232  for (i=0; i<numOutNodes; i++)
233  leaflist+=Form("True%d/D:",i);
234  leaflist.Remove(leaflist.Length()-1);
235  fIOTree->Branch("True", trueVal, leaflist);
236  Double_t v1 = 0.;
237  Double_t v2 = 0.;
238  // Loop on the events in the test sample
239  for(j=0; j< nEvents; j++) {
240  fNetwork->GetEntry(test->GetEntry(j));
241  // Loop on the neurons to evaluate
242  for(k=0; k<GetNeurons(1); k++) {
243  params[k] = formulas[k]->EvalInstance(index[k]);
244  }
245  for(k=0; k<GetNeurons(GetLayers()); k++) {
246  outVal[k] = fNetwork->Evaluate(k,params);
247  trueVal[k] = ((TNeuron*)fNetwork->fLastLayer[k])->GetBranch();
248  }
249  fIOTree->Fill();
250 
251  // Loop on the input neurons
252  for (i = 0; i < GetNeurons(1); i++) {
253  inNeuron = i;
254  diff = 0;
255  // Loop on the neurons in the output layer
256  for(l=0; l<GetNeurons(GetLayers()); l++){
257  params[i] += shift*rms[i];
258  v1 = fNetwork->Evaluate(l,params);
259  params[i] -= 2*shift*rms[i];
260  v2 = fNetwork->Evaluate(l,params);
261  diff += (v1-v2)*(v1-v2);
262  // reset to original value
263  params[i] += shift*rms[i];
264  }
265  diff = TMath::Sqrt(diff);
266  fAnalysisTree->Fill();
267  }
268  }
269  delete[] params;
270  delete[] rms;
271  delete[] outVal;
272  delete[] trueVal;
273  delete[] index;
274  for(i=0; i<GetNeurons(1); i++) delete formulas[i];
275  delete [] formulas;
276  fAnalysisTree->ResetBranchAddresses();
277  fIOTree->ResetBranchAddresses();
278 }
279 
280 ////////////////////////////////////////////////////////////////////////////////
281 /// Draws the distribution (on the test sample) of the
282 /// impact on the network output of a small variation of
283 /// the ith input.
284 
285 void TMLPAnalyzer::DrawDInput(Int_t i)
286 {
287  char sel[64];
288  snprintf(sel,64, "inNeuron==%d", i);
289  fAnalysisTree->Draw("diff", sel);
290 }
291 
292 ////////////////////////////////////////////////////////////////////////////////
293 /// Draws the distribution (on the test sample) of the
294 /// impact on the network output of a small variation of
295 /// each input.
296 /// DrawDInputs() draws something that approximates the distribution of the
297 /// derivative of the NN w.r.t. each input. That quantity is recognized as
298 /// one of the measures to determine key quantities in the network.
299 ///
300 /// What is done is to vary one input around its nominal value and to see
301 /// how the NN changes. This is done for each entry in the sample and produces
302 /// a distribution.
303 ///
304 /// What you can learn from that is:
305 /// - is variable a really useful, or is my network insensitive to it ?
306 /// - is there any risk of big systematic ? Is the network extremely sensitive
307 /// to small variations of any of my inputs ?
308 ///
309 /// As you might understand, this is to be considered with care and can serve
310 /// as input for an "educated guess" when optimizing the network.
311 
312 void TMLPAnalyzer::DrawDInputs()
313 {
314  THStack* stack = new THStack("differences","differences (impact of variables on ANN)");
315  TLegend* legend = new TLegend(0.75,0.75,0.95,0.95);
316  TH1F* tmp = 0;
317  char var[64], sel[64];
318  for(Int_t i = 0; i < GetNeurons(1); i++) {
319  snprintf(var,64, "diff>>tmp%d", i);
320  snprintf(sel,64, "inNeuron==%d", i);
321  fAnalysisTree->Draw(var, sel, "goff");
322  tmp = (TH1F*)gDirectory->Get(Form("tmp%d",i));
323  tmp->SetDirectory(0);
324  tmp->SetLineColor(i+1);
325  stack->Add(tmp);
326  legend->AddEntry(tmp,GetInputNeuronTitle(i),"l");
327  }
328  stack->Draw("nostack");
329  legend->Draw();
330  gPad->SetLogy();
331 }
332 
333 ////////////////////////////////////////////////////////////////////////////////
334 /// Draws the distribution of the neural network (using ith neuron).
335 /// Two distributions are drawn, for events passing respectively the "signal"
336 /// and "background" cuts. Only the test sample is used.
337 
338 void TMLPAnalyzer::DrawNetwork(Int_t neuron, const char* signal, const char* bg)
339 {
340  TTree* data = fNetwork->fData;
341  TEventList* test = fNetwork->fTest;
342  TEventList* current = data->GetEventList();
343  data->SetEventList(test);
344  THStack* stack = new THStack("__NNout_TMLPA",Form("Neural net output (neuron %d)",neuron));
345  TH1F *bgh = new TH1F("__bgh_TMLPA", "NN output", 50, -0.5, 1.5);
346  TH1F *sigh = new TH1F("__sigh_TMLPA", "NN output", 50, -0.5, 1.5);
347  bgh->SetDirectory(0);
348  sigh->SetDirectory(0);
349  Int_t nEvents = 0;
350  Int_t j=0;
351  // build event lists for signal and background
352  TEventList* signal_list = new TEventList("__tmpSig_MLPA");
353  TEventList* bg_list = new TEventList("__tmpBkg_MLPA");
354  data->Draw(">>__tmpSig_MLPA",signal,"goff");
355  data->Draw(">>__tmpBkg_MLPA",bg,"goff");
356 
357  // fill the background
358  nEvents = bg_list->GetN();
359  for(j=0; j< nEvents; j++) {
360  bgh->Fill(fNetwork->Result(bg_list->GetEntry(j),neuron));
361  }
362  // fill the signal
363  nEvents = signal_list->GetN();
364  for(j=0; j< nEvents; j++) {
365  sigh->Fill(fNetwork->Result(signal_list->GetEntry(j),neuron));
366  }
367  // draws the result
368  bgh->SetLineColor(kBlue);
369  bgh->SetFillStyle(3008);
370  bgh->SetFillColor(kBlue);
371  sigh->SetLineColor(kRed);
372  sigh->SetFillStyle(3003);
373  sigh->SetFillColor(kRed);
374  bgh->SetStats(0);
375  sigh->SetStats(0);
376  stack->Add(bgh);
377  stack->Add(sigh);
378  TLegend *legend = new TLegend(.75, .80, .95, .95);
379  legend->AddEntry(bgh, "Background");
380  legend->AddEntry(sigh,"Signal");
381  stack->Draw("nostack");
382  legend->Draw();
383  // restore the default event list
384  data->SetEventList(current);
385  delete signal_list;
386  delete bg_list;
387 }
388 
389 ////////////////////////////////////////////////////////////////////////////////
390 /// Create a profile of the difference of the MLP output minus the
391 /// true value for a given output node outnode, vs the true value for
392 /// outnode, for all test data events. This method is mainly useful
393 /// when doing regression analysis with the MLP (i.e. not classification,
394 /// but continuous truth values).
395 /// The resulting TProfile histogram is returned.
396 /// It is not drawn if option "goff" is specified.
397 /// Options are passed to TProfile::Draw
398 
399 TProfile* TMLPAnalyzer::DrawTruthDeviation(Int_t outnode /*=0*/,
400  Option_t *option /*=""*/)
401 {
402  if (!fIOTree) GatherInformations();
403  TString pipehist=Form("MLP_truthdev_%d",outnode);
404  TString drawline;
405  drawline.Form("Out.Out%d-True.True%d:True.True%d>>",
406  outnode, outnode, outnode);
407  fIOTree->Draw(drawline+pipehist+"(20)", "", "goff prof");
408  TProfile* h=(TProfile*)gDirectory->Get(pipehist);
409  h->SetDirectory(0);
410  const char* title=GetOutputNeuronTitle(outnode);
411  if (title) {
412  h->SetTitle(Form("#Delta(output - truth) vs. truth for %s",
413  title));
414  h->GetXaxis()->SetTitle(title);
415  h->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s", title));
416  }
417  if (!strstr(option,"goff"))
418  h->Draw();
419  return h;
420 }
421 
422 ////////////////////////////////////////////////////////////////////////////////
423 /// Creates TProfiles of the difference of the MLP output minus the
424 /// true value vs the true value, one for each output, filled with the
425 /// test data events. This method is mainly useful when doing regression
426 /// analysis with the MLP (i.e. not classification, but continuous truth
427 /// values).
428 /// The returned THStack contains all the TProfiles. It is drawn unless
429 /// the option "goff" is specified.
430 /// Options are passed to TProfile::Draw.
431 
432 THStack* TMLPAnalyzer::DrawTruthDeviations(Option_t *option /*=""*/)
433 {
434  THStack *hs=new THStack("MLP_TruthDeviation",
435  "Deviation of MLP output from truth");
436 
437  // leg!=0 means we're drawing
438  TLegend *leg=0;
439  if (!option || !strstr(option,"goff"))
440  leg=new TLegend(.4,.85,.95,.95,"#Delta(output - truth) vs. truth for:");
441 
442  const char* xAxisTitle=0;
443 
444  // create profile for each input neuron,
445  // adding them into the THStack and the TLegend
446  for (Int_t outnode=0; outnode<GetNeurons(GetLayers()); outnode++) {
447  TProfile* h=DrawTruthDeviation(outnode, "goff");
448  h->SetLineColor(1+outnode);
449  hs->Add(h, option);
450  if (leg) leg->AddEntry(h,GetOutputNeuronTitle(outnode));
451  if (!outnode)
452  // Xaxis title is the same for all, extract it from the first one.
453  xAxisTitle=h->GetXaxis()->GetTitle();
454  }
455 
456  if (leg) {
457  hs->Draw("nostack");
458  leg->Draw();
459  // gotta draw before accessing the axes
460  hs->GetXaxis()->SetTitle(xAxisTitle);
461  hs->GetYaxis()->SetTitle("#Delta(output - truth)");
462  }
463 
464  return hs;
465 }
466 
467 ////////////////////////////////////////////////////////////////////////////////
468 /// Creates a profile of the difference of the MLP output outnode minus
469 /// the true value of outnode vs the input value innode, for all test
470 /// data events.
471 /// The resulting TProfile histogram is returned.
472 /// It is not drawn if option "goff" is specified.
473 /// Options are passed to TProfile::Draw
474 
475 TProfile* TMLPAnalyzer::DrawTruthDeviationInOut(Int_t innode,
476  Int_t outnode /*=0*/,
477  Option_t *option /*=""*/)
478 {
479  if (!fIOTree) GatherInformations();
480  TString pipehist=Form("MLP_truthdev_i%d_o%d", innode, outnode);
481  TString drawline;
482  drawline.Form("Out.Out%d-True.True%d:In.In%d>>",
483  outnode, outnode, innode);
484  fIOTree->Draw(drawline+pipehist+"(50)", "", "goff prof");
485  TProfile* h=(TProfile*)gROOT->FindObject(pipehist);
486  h->SetDirectory(0);
487  const char* titleInNeuron=GetInputNeuronTitle(innode);
488  const char* titleOutNeuron=GetOutputNeuronTitle(outnode);
489  h->SetTitle(Form("#Delta(output - truth) of %s vs. input %s",
490  titleOutNeuron, titleInNeuron));
491  h->GetXaxis()->SetTitle(Form("%s", titleInNeuron));
492  h->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s",
493  titleOutNeuron));
494  if (!strstr(option,"goff"))
495  h->Draw(option);
496  return h;
497 }
498 
499 ////////////////////////////////////////////////////////////////////////////////
500 /// Creates a profile of the difference of the MLP output outnode minus the
501 /// true value of outnode vs the input value, stacked for all inputs, for
502 /// all test data events.
503 /// The returned THStack contains all the TProfiles. It is drawn unless
504 /// the option "goff" is specified.
505 /// Options are passed to TProfile::Draw.
506 
507 THStack* TMLPAnalyzer::DrawTruthDeviationInsOut(Int_t outnode /*=0*/,
508  Option_t *option /*=""*/)
509 {
510  TString sName;
511  sName.Form("MLP_TruthDeviationIO_%d", outnode);
512  const char* outputNodeTitle=GetOutputNeuronTitle(outnode);
513  THStack *hs=new THStack(sName,
514  Form("Deviation of MLP output %s from truth",
515  outputNodeTitle));
516 
517  // leg!=0 means we're drawing.
518  TLegend *leg=0;
519  if (!option || !strstr(option,"goff"))
520  leg=new TLegend(.4,.75,.95,.95,
521  Form("#Delta(output - truth) of %s vs. input for:",
522  outputNodeTitle));
523 
524  // create profile for each input neuron,
525  // adding them into the THStack and the TLegend
526  Int_t numInNodes=GetNeurons(1);
527  Int_t innode=0;
528  for (innode=0; innode<numInNodes; innode++) {
529  TProfile* h=DrawTruthDeviationInOut(innode, outnode, "goff");
530  h->SetLineColor(1+innode);
531  hs->Add(h, option);
532  if (leg) leg->AddEntry(h,h->GetXaxis()->GetTitle());
533  }
534 
535  if (leg) {
536  hs->Draw("nostack");
537  leg->Draw();
538  // gotta draw before accessing the axes
539  hs->GetXaxis()->SetTitle("Input value");
540  hs->GetYaxis()->SetTitle(Form("#Delta(output - truth) for %s",
541  outputNodeTitle));
542  }
543 
544  return hs;
545 }