Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RuleFitAPI.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Joerg Stelzer, Fredrik Tegenfeldt, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : RuleFitAPI *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Interface to Friedman's RuleFit method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Fredrik Tegenfeldt <Fredrik.Tegenfeldt@cern.ch> - Iowa State U., USA *
16  * Helge Voss <Helge.Voss@cern.ch> - MPI-KP Heidelberg, Ger. *
17  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
18  * *
19  * Copyright (c) 2005: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-KP Heidelberg, Germany *
23  * LAPP, Annecy, France *
24  * *
25  * Redistribution and use in source and binary forms, with or without *
26  * modification, are permitted according to the terms listed in LICENSE *
27  * *
28  **********************************************************************************/
29 
30 #ifndef ROOT_TMVA_RuleFitAPI
31 #define ROOT_TMVA_RuleFitAPI
32 
33 //////////////////////////////////////////////////////////////////////////
34 // //
35 // RuleFitAPI //
36 // //
37 // J Friedman's RuleFit method //
38 // //
39 //////////////////////////////////////////////////////////////////////////
40 
41 #include <fstream>
42 
43 #include "TMVA/MsgLogger.h"
44 
45 namespace TMVA {
46 
47  class MethodRuleFit;
48  class RuleFit;
49 
50  class RuleFitAPI {
51 
52  public:
53 
54  RuleFitAPI( const TMVA::MethodRuleFit *rfbase, TMVA::RuleFit *rulefit, EMsgType minType );
55 
56  virtual ~RuleFitAPI();
57 
58  // welcome message
59  void WelcomeMessage();
60 
61  // message on howto get the binary
62  void HowtoSetupRF();
63 
64  // Set RuleFit working directory
65  void SetRFWorkDir(const char * wdir);
66 
67  // Check RF work dir - aborts if it fails
68  void CheckRFWorkDir();
69 
70  // run rf_go.exe in various modes
71  inline void TrainRuleFit();
72  inline void TestRuleFit();
73  inline void VarImp();
74 
75  // read result into MethodRuleFit
76  Bool_t ReadModelSum();
77 
78  // Get working directory
79  const TString GetRFWorkDir() const { return fRFWorkDir; }
80 
81  protected:
82 
83  enum ERFMode { kRfRegress=1, kRfClass=2 }; // RuleFit modes, default=Class
84  enum EModel { kRfLinear=0, kRfRules=1, kRfBoth=2 }; // models, default=Both (rules+linear)
85  enum ERFProgram { kRfTrain=0, kRfPredict, kRfVarimp }; // rf_go.exe running mode
86 
87  // integer parameters
88  typedef struct {
89  Int_t mode;
90  Int_t lmode;
91  Int_t n;
92  Int_t p;
93  Int_t max_rules;
94  Int_t tree_size;
95  Int_t path_speed;
96  Int_t path_xval;
97  Int_t path_steps;
98  Int_t path_testfreq;
99  Int_t tree_store;
100  Int_t cat_store;
101  } IntParms;
102 
103  // float parameters
104  typedef struct {
105  Float_t xmiss;
106  Float_t trim_qntl;
107  Float_t huber;
108  Float_t inter_supp;
109  Float_t memory_par;
110  Float_t samp_fract;
111  Float_t path_inc;
112  Float_t conv_fac;
113  } RealParms;
114 
115  // setup
116  void InitRuleFit();
117  void FillRealParmsDef();
118  void FillIntParmsDef();
119  void ImportSetup();
120  void SetTrainParms();
121  void SetTestParms();
122 
123  // run
124  Int_t RunRuleFit();
125 
126  // set rf_go.exe running mode
127  void SetRFTrain() { fRFProgram = kRfTrain; }
128  void SetRFPredict() { fRFProgram = kRfPredict; }
129  void SetRFVarimp() { fRFProgram = kRfVarimp; }
130 
131  // handle rulefit files
132  inline TString GetRFName(TString name);
133  inline Bool_t OpenRFile(TString name, std::ofstream & f);
134  inline Bool_t OpenRFile(TString name, std::ifstream & f);
135 
136  // read/write binary files
137  inline Bool_t WriteInt(std::ofstream & f, const Int_t *v, Int_t n=1);
138  inline Bool_t WriteFloat(std::ofstream & f, const Float_t *v, Int_t n=1);
139  inline Int_t ReadInt(std::ifstream & f, Int_t *v, Int_t n=1) const;
140  inline Int_t ReadFloat(std::ifstream & f, Float_t *v, Int_t n=1) const;
141 
142  // write rf_go.exe i/o files
143  Bool_t WriteAll();
144  Bool_t WriteIntParms();
145  Bool_t WriteRealParms();
146  Bool_t WriteLx();
147  Bool_t WriteProgram();
148  Bool_t WriteRealVarImp();
149  Bool_t WriteRfOut();
150  Bool_t WriteRfStatus();
151  Bool_t WriteRuleFitMod();
152  Bool_t WriteRuleFitSum();
153  Bool_t WriteTrain();
154  Bool_t WriteVarNames();
155  Bool_t WriteVarImp();
156  Bool_t WriteYhat();
157  Bool_t WriteTest();
158 
159  // read rf_go.exe i/o files
160  Bool_t ReadYhat();
161  Bool_t ReadIntParms();
162  Bool_t ReadRealParms();
163  Bool_t ReadLx();
164  Bool_t ReadProgram();
165  Bool_t ReadRealVarImp();
166  Bool_t ReadRfOut();
167  Bool_t ReadRfStatus();
168  Bool_t ReadRuleFitMod();
169  Bool_t ReadRuleFitSum();
170  Bool_t ReadTrainX();
171  Bool_t ReadTrainY();
172  Bool_t ReadTrainW();
173  Bool_t ReadVarNames();
174  Bool_t ReadVarImp();
175 
176  private:
177  // prevent empty constructor from being used
178  RuleFitAPI();
179  const MethodRuleFit *fMethodRuleFit; // parent method - set in constructor
180  RuleFit *fRuleFit; // non const ptr to RuleFit class in MethodRuleFit
181  //
182  std::vector<Float_t> fRFYhat; // score results from test sample
183  std::vector<Float_t> fRFVarImp; // variable importances
184  std::vector<Int_t> fRFVarImpInd; // variable index
185  TString fRFWorkDir; // working directory
186  IntParms fRFIntParms; // integer parameters
187  RealParms fRFRealParms; // real parameters
188  std::vector<int> fRFLx; // variable selector
189  ERFProgram fRFProgram; // what to run
190  TString fModelType; // model type string
191 
192  mutable MsgLogger fLogger; // message logger
193 
194  ClassDef(RuleFitAPI,0); // Friedman's RuleFit method
195 
196  };
197 
198 } // namespace TMVA
199 
200 //_______________________________________________________________________
201 void TMVA::RuleFitAPI::TrainRuleFit()
202 {
203  // run rf_go.exe to train the model
204  SetTrainParms();
205  WriteAll();
206  RunRuleFit();
207 }
208 
209 //_______________________________________________________________________
210 void TMVA::RuleFitAPI::TestRuleFit()
211 {
212  // run rf_go.exe with the test data
213  SetTestParms();
214  WriteAll();
215  RunRuleFit();
216  ReadYhat(); // read in the scores
217 }
218 
219 //_______________________________________________________________________
220 void TMVA::RuleFitAPI::VarImp()
221 {
222  // run rf_go.exe to get the variable importance
223  SetRFVarimp();
224  WriteAll();
225  RunRuleFit();
226  ReadVarImp(); // read in the variable importances
227 }
228 
229 //_______________________________________________________________________
230 TString TMVA::RuleFitAPI::GetRFName(TString name)
231 {
232  // get the name including the rulefit directory
233  return fRFWorkDir+"/"+name;
234 }
235 
236 //_______________________________________________________________________
237 Bool_t TMVA::RuleFitAPI::OpenRFile(TString name, std::ofstream & f)
238 {
239  // open a file for writing in the rulefit directory
240  TString fullName = GetRFName(name);
241  f.open(fullName);
242  if (!f.is_open()) {
243  fLogger << kERROR << "Error opening RuleFit file for output: "
244  << fullName << Endl;
245  return kFALSE;
246  }
247  return kTRUE;
248 }
249 
250 //_______________________________________________________________________
251 Bool_t TMVA::RuleFitAPI::OpenRFile(TString name, std::ifstream & f)
252 {
253  // open a file for reading in the rulefit directory
254  TString fullName = GetRFName(name);
255  f.open(fullName);
256  if (!f.is_open()) {
257  fLogger << kERROR << "Error opening RuleFit file for input: "
258  << fullName << Endl;
259  return kFALSE;
260  }
261  return kTRUE;
262 }
263 
264 //_______________________________________________________________________
265 Bool_t TMVA::RuleFitAPI::WriteInt(std::ofstream & f, const Int_t *v, Int_t n)
266 {
267  // write an int
268  if (!f.is_open()) return kFALSE;
269  return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Int_t));
270 }
271 
272 //_______________________________________________________________________
273 Bool_t TMVA::RuleFitAPI::WriteFloat(std::ofstream & f, const Float_t *v, Int_t n)
274 {
275  // write a float
276  if (!f.is_open()) return kFALSE;
277  return (Bool_t)f.write(reinterpret_cast<char const *>(v), n*sizeof(Float_t));
278 }
279 
280 //_______________________________________________________________________
281 Int_t TMVA::RuleFitAPI::ReadInt(std::ifstream & f, Int_t *v, Int_t n) const
282 {
283  // read an int
284  if (!f.is_open()) return 0;
285  if (f.read(reinterpret_cast<char *>(v), n*sizeof(Int_t))) return 1;
286  return 0;
287 }
288 
289 //_______________________________________________________________________
290 Int_t TMVA::RuleFitAPI::ReadFloat(std::ifstream & f, Float_t *v, Int_t n) const
291 {
292  // read a float
293  if (!f.is_open()) return 0;
294  if (f.read(reinterpret_cast<char *>(v), n*sizeof(Float_t))) return 1;
295  return 0;
296 }
297 
298 #endif // RuleFitAPI_H