Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
Tools.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Tools *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Global auxiliary applications and data treatment routines *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
17  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_Tools
30 #define ROOT_TMVA_Tools
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // Tools (namespace) //
35 // //
36 // Global auxiliary applications and data treatment routines //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <sstream>
42 #include <iostream>
43 #include <iomanip>
44 #if __cplusplus > 199711L
45 #include <atomic>
46 #endif
47 
48 #include "TXMLEngine.h"
49 
50 #include "TMatrixDSymfwd.h"
51 
52 #include "TMatrixDfwd.h"
53 
54 #include "TVectorDfwd.h"
55 
56 #include "TVectorDfwd.h"
57 
58 #include "TMVA/Types.h"
59 
61 
62 #include "TString.h"
63 
64 #include "TMVA/MsgLogger.h"
65 
66 class TList;
67 class TTree;
68 class TH1;
69 class TH2;
70 class TH2F;
71 class TSpline;
72 class TXMLEngine;
73 
74 namespace TMVA {
75 
76  class Event;
77  class PDF;
78  class MsgLogger;
79 
80  class Tools {
81 
82  private:
83 
84  Tools();
85 
86  public:
87 
88  // destructor
89  ~Tools();
90 
91  // accessor to single instance
92  static Tools& Instance();
93  static void DestroyInstance();
94 
95 
96  template <typename T> Double_t Mean(Long64_t n, const T *a, const Double_t *w=0);
97  template <typename Iterator, typename WeightIterator> Double_t Mean ( Iterator first, Iterator last, WeightIterator w);
98 
99  template <typename T> Double_t RMS(Long64_t n, const T *a, const Double_t *w=0);
100  template <typename Iterator, typename WeightIterator> Double_t RMS(Iterator first, Iterator last, WeightIterator w);
101 
102 
103  // simple statistics operations on tree entries
104  void ComputeStat( const std::vector<TMVA::Event*>&,
105  std::vector<Float_t>*,
106  Double_t&, Double_t&, Double_t&,
107  Double_t&, Double_t&, Double_t&, Int_t signalClass,
108  Bool_t norm = kFALSE );
109 
110  // compute variance from sums
111  inline Double_t ComputeVariance( Double_t sumx2, Double_t sumx, Int_t nx );
112 
113  // creates histograms normalized to one
114  TH1* projNormTH1F( TTree* theTree, const TString& theVarName,
115  const TString& name, Int_t nbins,
116  Double_t xmin, Double_t xmax, const TString& cut );
117 
118  // normalize histogram by its integral
119  Double_t NormHist( TH1* theHist, Double_t norm = 1.0 );
120 
121  // parser for TString phrase with items separated by a character
122  TList* ParseFormatLine( TString theString, const char * sep = ":" );
123 
124  // parse option string for ANN methods
125  std::vector<Int_t>* ParseANNOptionString( TString theOptions, Int_t nvar,
126  std::vector<Int_t>* nodes );
127 
128  // returns the square-root of a symmetric matrix: symMat = sqrtMat*sqrtMat
129  TMatrixD* GetSQRootMatrix( TMatrixDSym* symMat );
130 
131  // returns the covariance matrix of of the different classes (and the sum)
132  // given the event sample
133  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
134  std::vector<TMatrixDSym*>* CalcCovarianceMatrices( const std::vector<const Event*>& events, Int_t maxCls, VariableTransformBase* transformBase=0 );
135 
136 
137  // turns covariance into correlation matrix
138  const TMatrixD* GetCorrelationMatrix( const TMatrixD* covMat );
139 
140  // check spline quality by comparison with initial histogram
141  Bool_t CheckSplines( const TH1*, const TSpline* );
142 
143  // normalization of variable output
144  Double_t NormVariable( Double_t x, Double_t xmin, Double_t xmax );
145 
146  // return separation of two histograms
147  Double_t GetSeparation( TH1* S, TH1* B ) const;
148  Double_t GetSeparation( const PDF& pdfS, const PDF& pdfB ) const;
149 
150  // vector rescaling
151  std::vector<Double_t> MVADiff( std::vector<Double_t>&, std::vector<Double_t>& );
152  void Scale( std::vector<Double_t>&, Double_t );
153  void Scale( std::vector<Float_t>&, Float_t );
154 
155  // re-arrange a vector of arrays (vectors) in a way such that the first array
156  // is ordered, and the other arrays reshuffled accordingly
157  void UsefulSortDescending( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
158  void UsefulSortAscending ( std::vector< std::vector<Double_t> >&, std::vector<TString>* vs = 0 );
159 
160  void UsefulSortDescending( std::vector<Double_t>& );
161  void UsefulSortAscending ( std::vector<Double_t>& );
162 
163  Int_t GetIndexMaxElement ( std::vector<Double_t>& );
164  Int_t GetIndexMinElement ( std::vector<Double_t>& );
165 
166  // check if input string contains regular expression
167  Bool_t ContainsRegularExpression( const TString& s );
168  TString ReplaceRegularExpressions( const TString& s, const TString& replace = "+" );
169 
170  // routines for formatted output -----------------
171  void FormattedOutput( const std::vector<Double_t>&, const std::vector<TString>&,
172  const TString titleVars, const TString titleValues, MsgLogger& logger,
173  TString format = "%+1.3f" );
174  void FormattedOutput( const TMatrixD&, const std::vector<TString>&, MsgLogger& logger );
175  void FormattedOutput( const TMatrixD&, const std::vector<TString>& vert, const std::vector<TString>& horiz,
176  MsgLogger& logger );
177 
178  void WriteFloatArbitraryPrecision( Float_t val, std::ostream& os );
179  void ReadFloatArbitraryPrecision ( Float_t& val, std::istream& is );
180 
181  // for histogramming
182  TString GetXTitleWithUnit( const TString& title, const TString& unit );
183  TString GetYTitleWithUnit( const TH1& h, const TString& unit, Bool_t normalised );
184 
185  // Mutual Information method for non-linear correlations estimates in 2D histogram
186  // Author: Moritz Backes, Geneva (2009)
187  Double_t GetMutualInformation( const TH2F& );
188 
189  // Correlation Ratio method for non-linear correlations estimates in 2D histogram
190  // Author: Moritz Backes, Geneva (2009)
191  Double_t GetCorrelationRatio( const TH2F& );
192  TH2F* TransposeHist ( const TH2F& );
193 
194  // check if "silent" or "verbose" option in configuration string
195  Bool_t CheckForSilentOption ( const TString& ) const;
196  Bool_t CheckForVerboseOption( const TString& ) const;
197 
198  // color information
199  const TString& Color( const TString& );
200 
201  // print welcome message (to be called from, eg, .TMVAlogon)
202  enum EWelcomeMessage { kStandardWelcomeMsg = 1,
203  kIsometricWelcomeMsg,
204  kBlockWelcomeMsg,
205  kLeanWelcomeMsg,
206  kLogoWelcomeMsg,
207  kSmall1WelcomeMsg,
208  kSmall2WelcomeMsg,
209  kOriginalWelcomeMsgColor,
210  kOriginalWelcomeMsgBW };
211 
212  // print TMVA citation (to be called from, eg, .TMVAlogon)
213  enum ECitation { kPlainText = 1,
214  kBibTeX,
215  kLaTeX,
216  kHtmlLink };
217 
218  void TMVAWelcomeMessage();
219  void TMVAWelcomeMessage( MsgLogger& logger, EWelcomeMessage m = kStandardWelcomeMsg );
220  void TMVAVersionMessage( MsgLogger& logger );
221  void ROOTVersionMessage( MsgLogger& logger );
222 
223  void TMVACitation( MsgLogger& logger, ECitation citType = kPlainText );
224 
225  // string tools
226 
227  std::vector<TString> SplitString( const TString& theOpt, const char separator ) const;
228 
229  // variables
230  const TString fRegexp;
231  mutable MsgLogger* fLogger;
232  MsgLogger& Log() const { return *fLogger; }
233 #if __cplusplus > 199711L
234  static std::atomic<Tools*> fgTools;
235 #else
236  static Tools* fgTools;
237 #endif
238 
239  // xml tools
240 
241  TString StringFromInt ( Long_t i );
242  TString StringFromDouble ( Double_t d );
243  void WriteTMatrixDToXML ( void* node, const char* name, TMatrixD* mat );
244  void WriteTVectorDToXML ( void* node, const char* name, TVectorD* vec );
245  void ReadTMatrixDFromXML( void* node, const char* name, TMatrixD* mat );
246  void ReadTVectorDFromXML( void* node, const char* name, TVectorD* vec );
247  Bool_t HistoHasEquidistantBins(const TH1& h);
248 
249  Bool_t HasAttr ( void* node, const char* attrname );
250  template<typename T>
251  inline void ReadAttr ( void* node, const char* , T& value );
252  void ReadAttr ( void* node, const char* attrname, TString& value );
253  void ReadAttr(void *node, const char *, float &value);
254  void ReadAttr(void *node, const char *, int &value);
255  void ReadAttr(void *node, const char *, short &value);
256 
257  template<typename T>
258  void AddAttr ( void* node, const char* , const T& value, Int_t precision = 16 );
259  void AddAttr ( void* node, const char* attrname, const char* value );
260  void* AddChild ( void* parent, const char* childname, const char* content = 0, bool isRootNode = false );
261  Bool_t AddRawLine ( void* node, const char * raw );
262  Bool_t AddComment ( void* node, const char* comment );
263 
264  void* GetParent( void* child);
265  void* GetChild ( void* parent, const char* childname=0 );
266  void* GetNextChild( void* prevchild, const char* childname=0 );
267  const char* GetContent ( void* node );
268  const char* GetName ( void* node );
269 
270  TXMLEngine& xmlengine() { return *fXMLEngine; }
271  int xmlenginebuffersize() { return fXMLBufferSize;}
272  void SetXMLEngineBufferSize(int buffer) { fXMLBufferSize = buffer; }
273  TXMLEngine* fXMLEngine;
274 
275  TH1* GetCumulativeDist( TH1* h);
276 
277  private:
278 
279  int fXMLBufferSize = 10000000;
280  // utilities for correlation ratio
281  Double_t GetYMean_binX( const TH2& , Int_t bin_x );
282 
283  }; // Common tools
284 
285  Tools& gTools(); // global accessor
286 
287  //
288  // Adapts a TRandom random number generator to the interface of the ones in the
289  // standard library (STL) so that TRandom derived generators can be used with
290  // STL algorithms such as `std::shuffle`.
291  //
292  // Example:
293  // ```
294  // std::vector<double> v {0, 1, 2, 3, 4, 5};
295  // TRandom3StdEngine rng(seed);
296  // std::shuffle(v.begin(), v.end(), rng);
297  // ```
298  //
299  // Or at a lower level:
300  // ```
301  // std::vector<double> v {0, 1, 2, 3, 4, 5};
302  // RandomGenerator<TRandom3> rng(seed);
303  // std::shuffle(v.begin(), v.end(), rng);
304  // ```
305  //
306  template <typename TRandomLike, typename UIntType = UInt_t, UIntType max_val = kMaxUInt>
307  class RandomGenerator {
308  public:
309  using result_type = UIntType;
310 
311  RandomGenerator(UIntType s = 0) { fRandom.SetSeed(s); }
312 
313  static constexpr UIntType min() { return 0; }
314  static constexpr UIntType max() { return max_val; }
315 
316  void seed(UIntType s = 0) { fRandom.SetSeed(s); }
317 
318  UIntType operator()() { return fRandom.Integer(max()); }
319 
320  void discard(unsigned long long z)
321  {
322  double r;
323  for (unsigned long long i = 0; i < z; ++i)
324  r = fRandom.Rndm();
325  (void) r; /* avoid unused variable warning */
326  }
327 
328  private:
329  TRandomLike fRandom; // random generator
330  };
331 
332 } // namespace TMVA
333 
334 ////////////////////////////////////////////////////////////////////////////////
335 /// read attribute from xml
336 
337 template<typename T> void TMVA::Tools::ReadAttr( void* node, const char* attrname, T& value )
338 {
339  // read attribute from xml
340  const char *val = xmlengine().GetAttr(node, attrname);
341  if (val == 0) {
342  const char *nodename = xmlengine().GetNodeName(node);
343  Log() << kFATAL << "Trying to read non-existing attribute '" << attrname << "' from xml node '" << nodename << "'"
344  << Endl;
345  }
346  std::stringstream s(val);
347  // coverity[tainted_data_argument]
348  s >> value;
349 }
350 
351 ////////////////////////////////////////////////////////////////////////////////
352 /// add attribute to xml
353 
354 template<typename T>
355 void TMVA::Tools::AddAttr( void* node, const char* attrname, const T& value, Int_t precision )
356 {
357  std::stringstream s;
358  s.precision( precision );
359  s << std::scientific << value;
360  AddAttr( node, attrname, s.str().c_str() );
361 }
362 
363 ////////////////////////////////////////////////////////////////////////////////
364 /// compute variance from given sums
365 
366 inline Double_t TMVA::Tools::ComputeVariance( Double_t sumx2, Double_t sumx, Int_t nx )
367 {
368  if (nx<2) return 0;
369  return (sumx2 - ((sumx*sumx)/static_cast<Double_t>(nx)))/static_cast<Double_t>(nx-1);
370 }
371 
372 #endif