46 Bool_t MethodRSVM::IsModuleLoaded = ROOT::R::TRInterface::Instance().Require("e1071");
50 MethodRSVM::MethodRSVM(const TString &jobName,
51 const TString &methodTitle,
53 const TString &theOption) :
54 RMethodBase(jobName, Types::kRSVM, methodTitle, dsi, theOption),
58 asfactor("as.factor"),
64 fType =
"C-classification";
68 fGamma = (fDfTrain.GetNcols() == 1) ? 1.0 : (1.0 / fDfTrain.GetNcols());
77 fProbability = kFALSE;
82 MethodRSVM::MethodRSVM(DataSetInfo &theData,
const TString &theWeightFile)
83 : RMethodBase(Types::kRSVM, theData, theWeightFile),
87 asfactor(
"as.factor"),
93 fType =
"C-classification";
97 fGamma = (fDfTrain.GetNcols() == 1) ? 1.0 : (1.0 / fDfTrain.GetNcols());
106 fProbability = kTRUE;
112 MethodRSVM::~MethodRSVM(
void)
114 if (fModel)
delete fModel;
118 Bool_t MethodRSVM::HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
120 if (type == Types::kClassification && numberClasses == 2)
return kTRUE;
126 void MethodRSVM::Init()
128 if (!IsModuleLoaded) {
129 Error(
"Init",
"R's package e1071 can not be loaded.");
130 Log() << kFATAL <<
" R's package e1071 can not be loaded."
136 void MethodRSVM::Train()
138 if (Data()->GetNTrainingEvents() == 0) Log() << kFATAL <<
"<Train> Data() has zero events" << Endl;
140 ROOT::R::TRDataFrame ClassWeightsTrain;
141 ClassWeightsTrain[
"background"] = Data()->GetNEvtBkgdTrain();
142 ClassWeightsTrain[
"signal"] = Data()->GetNEvtSigTrain();
145 <<
" Probability is " << fProbability
146 <<
" Tolerance is " << fTolerance
147 <<
" Type is " << fType
151 SEXP Model = svm(ROOT::R::Label[
"x"] = fDfTrain, \
152 ROOT::R::Label[
"y"] = asfactor(fFactorTrain), \
153 ROOT::R::Label[
"scale"] = fScale, \
154 ROOT::R::Label[
"type"] = fType, \
155 ROOT::R::Label[
"kernel"] = fKernel, \
156 ROOT::R::Label[
"degree"] = fDegree, \
157 ROOT::R::Label[
"gamma"] = fGamma, \
158 ROOT::R::Label[
"coef0"] = fCoef0, \
159 ROOT::R::Label[
"cost"] = fCost, \
160 ROOT::R::Label[
"nu"] = fNu, \
161 ROOT::R::Label[
"class.weights"] = ClassWeightsTrain, \
162 ROOT::R::Label[
"cachesize"] = fCacheSize, \
163 ROOT::R::Label[
"tolerance"] = fTolerance, \
164 ROOT::R::Label[
"epsilon"] = fEpsilon, \
165 ROOT::R::Label[
"shrinking"] = fShrinking, \
166 ROOT::R::Label[
"cross"] = fCross, \
167 ROOT::R::Label[
"probability"] = fProbability, \
168 ROOT::R::Label[
"fitted"] = fFitted);
169 fModel =
new ROOT::R::TRObject(Model);
170 if (IsModelPersistence())
172 TString path = GetWeightFileDir() +
"/" + GetName() +
".RData";
174 Log() << gTools().Color(
"bold") <<
"--- Saving State File In:" << gTools().Color(
"reset") << path << Endl;
176 r[
"RSVMModel"] << Model;
177 r <<
"save(RSVMModel,file='" + path +
"')";
182 void MethodRSVM::DeclareOptions()
184 DeclareOptionRef(fScale,
"Scale",
"A logical vector indicating the variables to be scaled. If\
185 ‘scale’ is of length 1, the value is recycled as many times \
186 as needed. Per default, data are scaled internally (both ‘x’\
187 and ‘y’ variables) to zero mean and unit variance. The center \
188 and scale values are returned and used for later predictions.");
189 DeclareOptionRef(fType,
"Type",
"‘svm’ can be used as a classification machine, as a \
190 regression machine, or for novelty detection. Depending of\
191 whether ‘y’ is a factor or not, the default setting for\
192 ‘type’ is ‘C-classification’ or ‘eps-regression’,\
193 respectively, but may be overwritten by setting an explicit value.\
195 - ‘C-classification’\
196 - ‘nu-classification’\
197 - ‘one-classification’ (for novelty detection)\
200 DeclareOptionRef(fKernel,
"Kernel",
"the kernel used in training and predicting. You might\
201 consider changing some of the following parameters, depending on the kernel type.\
203 polynomial: (gamma*u'*v + coef0)^degree\
204 radial basis: exp(-gamma*|u-v|^2)\
205 sigmoid: tanh(gamma*u'*v + coef0)");
206 DeclareOptionRef(fDegree,
"Degree",
"parameter needed for kernel of type ‘polynomial’ (default: 3)");
207 DeclareOptionRef(fGamma,
"Gamma",
"parameter needed for all kernels except ‘linear’ (default:1/(data dimension))");
208 DeclareOptionRef(fCoef0,
"Coef0",
"parameter needed for kernels of type ‘polynomial’ and ‘sigmoid’ (default: 0)");
209 DeclareOptionRef(fCost,
"Cost",
"cost of constraints violation (default: 1)-it is the ‘C’-constant of the regularization term in the Lagrange formulation.");
210 DeclareOptionRef(fNu,
"Nu",
"parameter needed for ‘nu-classification’, ‘nu-regression’,and ‘one-classification’");
211 DeclareOptionRef(fCacheSize,
"CacheSize",
"cache memory in MB (default 40)");
212 DeclareOptionRef(fTolerance,
"Tolerance",
"tolerance of termination criterion (default: 0.001)");
213 DeclareOptionRef(fEpsilon,
"Epsilon",
"epsilon in the insensitive-loss function (default: 0.1)");
214 DeclareOptionRef(fShrinking,
"Shrinking",
"option whether to use the shrinking-heuristics (default:‘TRUE’)");
215 DeclareOptionRef(fCross,
"Cross",
"if a integer value k>0 is specified, a k-fold cross validation on the training data is performed to assess the quality of the model: the accuracy rate for classification and the Mean Squared Error for regression");
216 DeclareOptionRef(fProbability,
"Probability",
"logical indicating whether the model should allow for probability predictions");
217 DeclareOptionRef(fFitted,
"Fitted",
"logical indicating whether the fitted values should be computed and included in the model or not (default: ‘TRUE’)");
222 void MethodRSVM::ProcessOptions()
224 r[
"RMVA.RSVM.Scale"] = fScale;
225 r[
"RMVA.RSVM.Type"] = fType;
226 r[
"RMVA.RSVM.Kernel"] = fKernel;
227 r[
"RMVA.RSVM.Degree"] = fDegree;
228 r[
"RMVA.RSVM.Gamma"] = fGamma;
229 r[
"RMVA.RSVM.Coef0"] = fCoef0;
230 r[
"RMVA.RSVM.Cost"] = fCost;
231 r[
"RMVA.RSVM.Nu"] = fNu;
232 r[
"RMVA.RSVM.CacheSize"] = fCacheSize;
233 r[
"RMVA.RSVM.Tolerance"] = fTolerance;
234 r[
"RMVA.RSVM.Epsilon"] = fEpsilon;
235 r[
"RMVA.RSVM.Shrinking"] = fShrinking;
236 r[
"RMVA.RSVM.Cross"] = fCross;
237 r[
"RMVA.RSVM.Probability"] = fProbability;
238 r[
"RMVA.RSVM.Fitted"] = fFitted;
243 void MethodRSVM::TestClassification()
245 Log() << kINFO <<
"Testing Classification RSVM METHOD " << Endl;
247 MethodBase::TestClassification();
252 Double_t MethodRSVM::GetMvaValue(Double_t *errLower, Double_t *errUpper)
254 NoErrorCalc(errLower, errUpper);
256 const TMVA::Event *ev = GetEvent();
257 const UInt_t nvar = DataInfo().GetNVariables();
258 ROOT::R::TRDataFrame fDfEvent;
259 for (UInt_t i = 0; i < nvar; i++) {
260 fDfEvent[DataInfo().GetListOfVariables()[i].Data()] = ev->GetValues()[i];
263 if (IsModelPersistence()) ReadStateFromFile();
265 ROOT::R::TRObject result = predict(*fModel, fDfEvent, ROOT::R::Label[
"decision.values"] = kTRUE, ROOT::R::Label[
"probability"] = kTRUE);
266 TVectorD values = result.GetAttribute(
"decision.values");
267 mvaValue = values[0];
273 std::vector<Double_t> MethodRSVM::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
275 Long64_t nEvents = Data()->GetNEvents();
276 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
277 if (firstEvt < 0) firstEvt = 0;
279 nEvents = lastEvt-firstEvt;
281 UInt_t nvars = Data()->GetNVariables();
284 Timer timer( nEvents, GetName(), kTRUE );
286 Log() << kINFO<<Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Evaluation of " << GetMethodName() <<
" on "
287 << (Data()->GetCurrentType()==Types::kTraining?
"training":
"testing") <<
" sample (" << nEvents <<
" events)" << Endl;
291 std::vector<std::vector<Float_t> > inputData(nvars);
292 for (UInt_t i = 0; i < nvars; i++) {
293 inputData[i] = std::vector<Float_t>(nEvents);
296 for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
297 Data()->SetCurrentEvent(ievt);
298 const TMVA::Event *e = Data()->GetEvent();
299 assert(nvars == e->GetNVariables());
300 for (UInt_t i = 0; i < nvars; i++) {
301 inputData[i][ievt] = e->GetValue(i);
307 ROOT::R::TRDataFrame evtData;
308 for (UInt_t i = 0; i < nvars; i++) {
309 evtData[DataInfo().GetListOfVariables()[i].Data()] = inputData[i];
312 if (IsModelPersistence()) ReadModelFromFile();
314 std::vector<Double_t> mvaValues(nEvents);
317 ROOT::R::TRObject result = predict(*fModel, evtData, ROOT::R::Label[
"decision.values"] = kTRUE, ROOT::R::Label[
"probability"] = kTRUE);
319 r[
"result"] << result;
320 r <<
"v2 <- attr(result, \"probabilities\") ";
322 r[
"length(v2)"] >> probSize;
325 std::vector<Double_t> probValues = result.GetAttribute(
"probabilities");
327 assert(probValues.size() == 2*mvaValues.size());
328 for (
int i = 0; i < nEvents; ++i)
331 mvaValues[i] = probValues[nEvents+i];
336 Log() << kINFO <<
" : Probabilities are not available. Use decision values instead !" << Endl;
338 std::vector<Double_t> probValues = result.GetAttribute(
"decision.values");
339 mvaValues = probValues;
347 Log() << kINFO <<Form(
"Dataset[%s] : ",DataInfo().GetName())<<
"Elapsed time for evaluation of " << nEvents <<
" events: "
348 << timer.GetElapsedTime() <<
" " << Endl;
356 void TMVA::MethodRSVM::ReadModelFromFile()
358 ROOT::R::TRInterface::Instance().Require(
"e1071");
359 TString path = GetWeightFileDir() +
"/" + GetName() +
".RData";
361 Log() << gTools().Color(
"bold") <<
"--- Loading State File From:" << gTools().Color(
"reset") << path << Endl;
363 r <<
"load('" + path +
"')";
365 r[
"RSVMModel"] >> Model;
366 fModel =
new ROOT::R::TRObject(Model);
371 void MethodRSVM::GetHelpMessage()
const
378 Log() << gTools().Color(
"bold") <<
"--- Short description:" << gTools().Color(
"reset") << Endl;
380 Log() <<
"Decision Trees and Rule-Based Models " << Endl;
382 Log() << gTools().Color(
"bold") <<
"--- Performance optimisation:" << gTools().Color(
"reset") << Endl;
385 Log() << gTools().Color(
"bold") <<
"--- Performance tuning via configuration options:" << gTools().Color(
"reset") << Endl;
387 Log() <<
"<None>" << Endl;