51 Bool_t gFIRSTTST=kTRUE;
52 Bool_t gFIRSTORG=kTRUE;
58 Double_t gGDEvalRule=0;
59 Double_t gGDRuleLoop=0;
60 Double_t gGDLinLoop=0;
65 TMVA::RuleFitParams::RuleFitParams()
81 , fGDPathStep ( 0.01 )
82 , fGDNPathSteps ( 1000 )
98 , fLogger( new MsgLogger(
"RuleFit") )
105 TMVA::RuleFitParams::~RuleFitParams()
107 if (fNTCoeff) {
delete fNTCoeff; fNTCoeff = 0; }
108 if (fNTLinCoeff) {
delete fNTLinCoeff;fNTLinCoeff = 0; }
115 void TMVA::RuleFitParams::Init()
117 if (fRuleFit==0)
return;
118 if (fRuleFit->GetMethodRuleFit()==0) {
119 Log() << kFATAL <<
"RuleFitParams::Init() - MethodRuleFit ptr is null" << Endl;
121 UInt_t neve = fRuleFit->GetTrainingEvents().size();
123 fRuleEnsemble = fRuleFit->GetRuleEnsemblePtr();
124 fNRules = fRuleEnsemble->GetNRules();
125 fNLinear = fRuleEnsemble->GetNLinear();
134 fPerfIdx2 =
static_cast<UInt_t
>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDValidEveFrac());
139 ofs = neve - fPerfIdx2 - 1;
149 fPathIdx2 =
static_cast<UInt_t
>((neve-1)*fRuleFit->GetMethodRuleFit()->GetGDPathEveFrac());
158 for (UInt_t ie=fPathIdx1; ie<fPathIdx2+1; ie++) {
159 fNEveEffPath += fRuleFit->GetTrainingEventWeight(ie);
163 for (UInt_t ie=fPerfIdx1; ie<fPerfIdx2+1; ie++) {
164 fNEveEffPerf += fRuleFit->GetTrainingEventWeight(ie);
167 Log() << kVERBOSE <<
"Path constr. - event index range = [ " << fPathIdx1 <<
", " << fPathIdx2 <<
" ]"
168 <<
", effective N(events) = " << fNEveEffPath << Endl;
169 Log() << kVERBOSE <<
"Error estim. - event index range = [ " << fPerfIdx1 <<
", " << fPerfIdx2 <<
" ]"
170 <<
", effective N(events) = " << fNEveEffPerf << Endl;
172 if (fRuleEnsemble->DoRules())
173 Log() << kDEBUG <<
"Number of rules in ensemble: " << fNRules << Endl;
175 Log() << kDEBUG <<
"Rules are disabled " << Endl;
177 if (fRuleEnsemble->DoLinear())
178 Log() << kDEBUG <<
"Number of linear terms: " << fNLinear << Endl;
180 Log() << kDEBUG <<
"Linear terms are disabled " << Endl;
186 void TMVA::RuleFitParams::InitNtuple()
188 fGDNtuple=
new TTree(
"MonitorNtuple_RuleFitParams",
"RuleFit path search");
189 fGDNtuple->Branch(
"risk", &fNTRisk,
"risk/D");
190 fGDNtuple->Branch(
"error", &fNTErrorRate,
"error/D");
191 fGDNtuple->Branch(
"nuval", &fNTNuval,
"nuval/D");
192 fGDNtuple->Branch(
"coefrad", &fNTCoefRad,
"coefrad/D");
193 fGDNtuple->Branch(
"offset", &fNTOffset,
"offset/D");
195 fNTCoeff = (fNRules >0 ?
new Double_t[fNRules] : 0);
196 fNTLinCoeff = (fNLinear>0 ?
new Double_t[fNLinear] : 0);
198 for (UInt_t i=0; i<fNRules; i++) {
199 fGDNtuple->Branch(Form(
"a%d",i+1),&fNTCoeff[i],Form(
"a%d/D",i+1));
201 for (UInt_t i=0; i<fNLinear; i++) {
202 fGDNtuple->Branch(Form(
"b%d",i+1),&fNTLinCoeff[i],Form(
"b%d/D",i+1));
209 void TMVA::RuleFitParams::EvaluateAverage( UInt_t ind1, UInt_t ind2,
210 std::vector<Double_t> &avsel,
211 std::vector<Double_t> &avrul )
213 UInt_t neve = ind2-ind1+1;
215 Log() << kFATAL <<
"<EvaluateAverage> - no events selected for path search -> BUG!" << Endl;
221 if (fNLinear>0) avsel.resize(fNLinear,0);
222 if (fNRules>0) avrul.resize(fNRules,0);
223 const std::vector<UInt_t> *eventRuleMap=0;
229 if (fRuleEnsemble->IsRuleMapOK()) {
230 for ( UInt_t i=ind1; i<ind2+1; i++) {
231 ew = fRuleFit->GetTrainingEventWeight(i);
233 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
234 avsel[sel] += ew*fRuleEnsemble->EvalLinEvent(i,sel);
238 if (fRuleEnsemble->DoRules()) {
239 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
240 nrules = (*eventRuleMap).size();
242 for (UInt_t r=0; r<nrules; r++) {
243 avrul[(*eventRuleMap)[r]] += ew;
248 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
249 for ( UInt_t i=ind1; i<ind2+1; i++) {
250 ew = fRuleFit->GetTrainingEventWeight(i);
253 fRuleEnsemble->EvalLinEvent(*((*events)[i]));
254 fRuleEnsemble->EvalEvent(*((*events)[i]));
256 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
257 avsel[sel] += ew*fRuleEnsemble->GetEventLinearValNorm(sel);
260 for (UInt_t r=0; r<fNRules; r++) {
261 avrul[r] += ew*fRuleEnsemble->GetEventRuleVal(r);
266 for ( UInt_t sel=0; sel<fNLinear; sel++ ) {
267 avsel[sel] = avsel[sel] / sumew;
270 for (UInt_t r=0; r<fNRules; r++) {
271 avrul[r] = avrul[r] / sumew;
279 Double_t TMVA::RuleFitParams::LossFunction(
const Event& e )
const
281 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( e )) );
282 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)?1:-1) - h;
284 return diff*diff*e.GetWeight();
291 Double_t TMVA::RuleFitParams::LossFunction( UInt_t evtidx )
const
293 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,fRuleEnsemble->EvalEvent( evtidx )) );
294 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
296 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
303 Double_t TMVA::RuleFitParams::LossFunction( UInt_t evtidx, UInt_t itau )
const
305 Double_t e = fRuleEnsemble->EvalEvent( evtidx , fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau]);
306 Double_t h = TMath::Max( -1.0, TMath::Min(1.0,e) );
307 Double_t diff = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(fRuleEnsemble->GetRuleMapEvent( evtidx ))?1:-1) - h;
309 return diff*diff*fRuleFit->GetTrainingEventWeight(evtidx);
315 Double_t TMVA::RuleFitParams::Risk(UInt_t ind1,UInt_t ind2, Double_t neff)
const
317 UInt_t neve = ind2-ind1+1;
319 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" << Endl;
324 for ( UInt_t i=ind1; i<ind2+1; i++) {
325 rval += LossFunction(i);
335 Double_t TMVA::RuleFitParams::Risk(UInt_t ind1,UInt_t ind2, Double_t neff, UInt_t itau)
const
337 UInt_t neve = ind2-ind1+1;
339 Log() << kFATAL <<
"<Risk> Invalid start/end indices! BUG!!!" << Endl;
344 for ( UInt_t i=ind1; i<ind2+1; i++) {
345 rval += LossFunction(i,itau);
357 Double_t TMVA::RuleFitParams::Penalty()
const
359 Log() << kWARNING <<
"<Penalty> Using unverified code! Check!" << Endl;
361 const std::vector<Double_t> *lincoeff = & (fRuleEnsemble->GetLinCoefficients());
362 for (UInt_t i=0; i<fNRules; i++) {
363 rval += TMath::Abs(fRuleEnsemble->GetRules(i)->GetCoefficient());
365 for (UInt_t i=0; i<fNLinear; i++) {
366 rval += TMath::Abs((*lincoeff)[i]);
374 void TMVA::RuleFitParams::InitGD()
392 fGDTauVec.resize( fGDNTau );
394 fGDTauVec[0] = fGDTau;
398 Double_t dtau = (fGDTauMax - fGDTauMin)/static_cast<Double_t>(fGDNTau-1);
399 for (UInt_t itau=0; itau<fGDNTau; itau++) {
400 fGDTauVec[itau] =
static_cast<Double_t
>(itau)*dtau + fGDTauMin;
401 if (fGDTauVec[itau]>1.0) fGDTauVec[itau]=1.0;
409 fGradVecLinTst.clear();
414 fGDCoefLinTst.clear();
418 fGDCoefTst.resize(fGDNTau);
419 fGradVec.resize(fNRules,0);
420 fGradVecTst.resize(fGDNTau);
421 for (UInt_t i=0; i<fGDNTau; i++) {
422 fGradVecTst[i].resize(fNRules,0);
423 fGDCoefTst[i].resize(fNRules,0);
428 fGDCoefLinTst.resize(fGDNTau);
429 fGradVecLin.resize(fNLinear,0);
430 fGradVecLinTst.resize(fGDNTau);
431 for (UInt_t i=0; i<fGDNTau; i++) {
432 fGradVecLinTst[i].resize(fNLinear,0);
433 fGDCoefLinTst[i].resize(fNLinear,0);
438 fGDErrTst.resize(fGDNTau,0);
439 fGDErrTstOK.resize(fGDNTau,kTRUE);
440 fGDOfsTst.resize(fGDNTau,0);
441 fGDNTauTstOK = fGDNTau;
450 Int_t TMVA::RuleFitParams::FindGDTau()
452 if (fGDNTau<2)
return 0;
453 if (fGDTauScan==0)
return 0;
455 if (fGDOfsTst.size()<1)
456 Log() << kFATAL <<
"BUG! FindGDTau() has been called BEFORE InitGD()." << Endl;
458 Log() << kINFO <<
"Estimating the cutoff parameter tau. The estimated time is a pessimistic maximum." << Endl;
461 UInt_t nscan = fGDTauScan;
462 UInt_t netst = std::min(nscan,UInt_t(100));
478 Timer timer( nscan,
"RuleFit" );
481 MakeTstGradientVector();
483 UpdateTstCoefficients();
487 if ( (ip==0) || ((ip+1)%netst==0) ) {
489 itauMin = RiskPerfTst();
490 Log() << kVERBOSE << Form(
"%4d",ip+1) <<
". tau = " << Form(
"%4.4f",fGDTauVec[itauMin])
491 <<
" => error rate = " << fGDErrTst[itauMin] << Endl;
494 doloop = ((ip<nscan) && (fGDNTauTstOK>3));
496 if (Log().GetMinType()>kVERBOSE)
497 timer.DrawProgressBar(ip);
504 Log() << kERROR <<
"<FindGDTau> number of scanned loops is zero! Should NOT see this message." << Endl;
506 fGDTau = fGDTauVec[itauMin];
507 fRuleEnsemble->SetCoefficients( fGDCoefTst[itauMin] );
508 fRuleEnsemble->SetLinCoefficients( fGDCoefLinTst[itauMin] );
509 fRuleEnsemble->SetOffset( fGDOfsTst[itauMin] );
510 Log() << kINFO <<
"Best path found with tau = " << Form(
"%4.4f",fGDTau)
511 <<
" after " << timer.GetElapsedTime() <<
" " << Endl;
539 void TMVA::RuleFitParams::MakeGDPath()
541 Log() << kINFO <<
"GD path scan - the scan stops when the max num. of steps is reached or a min is found"
543 Log() << kVERBOSE <<
"Number of events used per path step = " << fPathIdx2-fPathIdx1+1 << Endl;
544 Log() << kVERBOSE <<
"Number of events used for error estimation = " << fPerfIdx2-fPerfIdx1+1 << Endl;
547 const Bool_t isVerbose = (Log().GetMinType()<=kVERBOSE);
548 const Bool_t isDebug = (Log().GetMinType()<=kDEBUG);
554 EvaluateAveragePath();
555 EvaluateAveragePerf();
558 Log() << kVERBOSE <<
"Creating GD path" << Endl;
559 Log() << kVERBOSE <<
" N(steps) = " << fGDNPathSteps << Endl;
560 Log() << kVERBOSE <<
" step = " << fGDPathStep << Endl;
561 Log() << kVERBOSE <<
" N(tau) = " << fGDNTau << Endl;
562 Log() << kVERBOSE <<
" N(tau steps) = " << fGDTauScan << Endl;
563 Log() << kVERBOSE <<
" tau range = [ " << fGDTauVec[0] <<
" , " << fGDTauVec[fGDNTau-1] <<
" ]" << Endl;
566 if (isDebug) InitNtuple();
575 Double_t errmin=1e32;
578 std::vector<Double_t> coefsMin;
579 std::vector<Double_t> lincoefsMin;
588 Double_t stgradvec=0;
589 Double_t stupgrade=0;
593 const UInt_t npreg=5;
594 std::vector<Double_t> valx;
595 std::vector<Double_t> valy;
596 std::vector<Double_t> valxy;
602 Bool_t riskFlat=kFALSE;
603 Bool_t done = kFALSE;
606 int imod = fGDNPathSteps/100;
607 if (imod<100) imod = std::min(100,fGDNPathSteps);
608 if (imod>100) imod=100;
611 fAverageTruth = -CalcAverageTruth();
612 offsetMin = fAverageTruth;
613 fRuleEnsemble->SetOffset(offsetMin);
614 fRuleEnsemble->ClearCoefficients(0);
615 fRuleEnsemble->ClearLinCoefficients(0);
616 for (UInt_t i=0; i<fGDOfsTst.size(); i++) {
617 fGDOfsTst[i] = offsetMin;
619 Log() << kVERBOSE <<
"Obtained initial offset = " << offsetMin << Endl;
622 Int_t nprescan = FindGDTau();
637 Int_t stopCondition=0;
639 Log() << kINFO <<
"Fitting model..." << Endl;
641 Timer timer( fGDNPathSteps,
"RuleFit" );
645 if (isVerbose) t0 = clock();
646 MakeGradientVector();
648 tgradvec = Double_t(clock()-t0)/CLOCKS_PER_SEC;
649 stgradvec += tgradvec;
653 if (isVerbose) t0 = clock();
654 UpdateCoefficients();
656 tupgrade = Double_t(clock()-t0)/CLOCKS_PER_SEC;
657 stupgrade += tupgrade;
661 docheck = ((iloop==0) ||((iloop+1)%imod==0));
666 timer.DrawProgressBar(iloop);
667 fNTNuval = Double_t(iloop)*fGDPathStep;
672 if (isDebug) FillCoefficients();
673 fNTCoefRad = fRuleEnsemble->CoefficientRadius();
677 fNTRisk = RiskPath();
678 trisk = Double_t(clock()-t0)/CLOCKS_PER_SEC;
685 if (fNTRisk>=rprev) {
688 Log() <<
"Risk(i+1)>=Risk(i) in path" << Endl;
689 riskFlat=(nbadrisk>3);
691 Log() << kWARNING <<
"Chaotic behaviour of risk evolution" << Endl;
692 Log() <<
"--- STOPPING MINIMISATION ---" << Endl;
693 Log() <<
"This may be OK if minimum is already found" << Endl;
703 if (isVerbose) t0 = clock();
708 Double_t riskPerf = RiskPerf();
713 fNTErrorRate = errroc;
716 tperf = Double_t(clock()-t0)/CLOCKS_PER_SEC;
723 if (fNTErrorRate<=errmin) {
724 errmin = fNTErrorRate;
727 fRuleEnsemble->GetCoefficients(coefsMin);
728 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
729 offsetMin = fRuleEnsemble->GetOffset();
731 if ( fNTErrorRate > fGDErrScale*errmin) found = kTRUE;
735 if (valx.size()==npreg) {
736 valx.erase(valx.begin());
737 valy.erase(valy.begin());
738 valxy.erase(valxy.begin());
740 valx.push_back(fNTNuval);
741 valy.push_back(fNTErrorRate);
742 valxy.push_back(fNTErrorRate*fNTNuval);
747 if (isDebug) fGDNtuple->Fill();
749 Log() << kVERBOSE <<
"ParamsIRE : "
751 << Form(
"%8d",iloop+1) <<
" "
752 << Form(
"%4.4f",fNTRisk) <<
" "
753 << Form(
"%4.4f",riskPerf) <<
" "
754 << Form(
"%4.4f",fNTRisk+riskPerf) <<
" "
770 Bool_t endOfLoop = (iloop==fGDNPathSteps);
771 if ( ((riskFlat) || (endOfLoop)) && (!found) ) {
775 else if (endOfLoop) {
779 Log() << kWARNING <<
"BUG TRAP: should not be here - still, this bug is harmless;)" << Endl;
780 errmin = fNTErrorRate;
783 fRuleEnsemble->GetCoefficients(coefsMin);
784 lincoefsMin = fRuleEnsemble->GetLinCoefficients();
785 offsetMin = fRuleEnsemble->GetOffset();
792 Log() << kINFO <<
"Minimisation elapsed time : " << timer.GetElapsedTime() <<
" " << Endl;
793 Log() << kINFO <<
"----------------------------------------------------------------" << Endl;
794 Log() << kINFO <<
"Found minimum at step " << indMin+1 <<
" with error = " << errmin << Endl;
795 Log() << kINFO <<
"Reason for ending loop: ";
796 switch (stopCondition) {
798 Log() << kINFO <<
"clear minima found";
801 Log() << kINFO <<
"chaotic behaviour of risk";
804 Log() << kINFO <<
"end of loop reached";
807 Log() << kINFO <<
"unknown!";
811 Log() << kINFO <<
"----------------------------------------------------------------" << Endl;
814 if ( Double_t(indMin)/Double_t(nprescan+fGDNPathSteps) < 0.05 ) {
815 Log() << kWARNING <<
"Reached minimum early in the search" << Endl;
816 Log() <<
"Check results and maybe decrease GDStep size" << Endl;
821 Double_t sumx = std::accumulate( valx.begin(), valx.end(), Double_t() );
822 Double_t sumxy = std::accumulate( valxy.begin(), valxy.end(), Double_t() );
823 Double_t sumy = std::accumulate( valy.begin(), valy.end(), Double_t() );
824 Double_t slope = Double_t(valx.size())*sumxy - sumx*sumy;
826 Log() << kINFO <<
"The error rate was still decreasing at the end of the path" << Endl;
827 Log() << kINFO <<
"Increase number of steps (GDNSteps)." << Endl;
833 fRuleEnsemble->SetCoefficients( coefsMin );
834 fRuleEnsemble->SetLinCoefficients( lincoefsMin );
835 fRuleEnsemble->SetOffset( offsetMin );
838 Log() << kFATAL <<
"BUG TRAP: minimum not found in MakeGDPath()" << Endl;
845 Double_t stloop = strisk +stupgrade + stgradvec + stperf;
846 Log() << kVERBOSE <<
"Timing per loop (ms):" << Endl;
847 Log() << kVERBOSE <<
" gradvec = " << 1000*stgradvec/iloop << Endl;
848 Log() << kVERBOSE <<
" upgrade = " << 1000*stupgrade/iloop << Endl;
849 Log() << kVERBOSE <<
" risk = " << 1000*strisk/iloop << Endl;
850 Log() << kVERBOSE <<
" perf = " << 1000*stperf/iloop << Endl;
851 Log() << kVERBOSE <<
" loop = " << 1000*stloop/iloop << Endl;
853 Log() << kVERBOSE <<
" GDInit = " << 1000*gGDInit/iloop << Endl;
854 Log() << kVERBOSE <<
" GDPtr = " << 1000*gGDPtr/iloop << Endl;
855 Log() << kVERBOSE <<
" GDEval = " << 1000*gGDEval/iloop << Endl;
856 Log() << kVERBOSE <<
" GDEvalRule = " << 1000*gGDEvalRule/iloop << Endl;
857 Log() << kVERBOSE <<
" GDNorm = " << 1000*gGDNorm/iloop << Endl;
858 Log() << kVERBOSE <<
" GDRuleLoop = " << 1000*gGDRuleLoop/iloop << Endl;
859 Log() << kVERBOSE <<
" GDLinLoop = " << 1000*gGDLinLoop/iloop << Endl;
863 if (isDebug) fGDNtuple->Write();
869 void TMVA::RuleFitParams::FillCoefficients()
871 fNTOffset = fRuleEnsemble->GetOffset();
873 for (UInt_t i=0; i<fNRules; i++) {
874 fNTCoeff[i] = fRuleEnsemble->GetRules(i)->GetCoefficient();
876 for (UInt_t i=0; i<fNLinear; i++) {
877 fNTLinCoeff[i] = fRuleEnsemble->GetLinCoefficients(i);
886 void TMVA::RuleFitParams::CalcFStar()
888 Log() << kWARNING <<
"<CalcFStar> Using unverified code! Check!" << Endl;
889 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
891 Log() << kFATAL <<
"<CalcFStar> Invalid start/end indices!" << Endl;
895 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
898 std::vector<Double_t> fstarSorted;
901 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
902 const Event& e = *(*events)[i];
903 fstarVal = fRuleEnsemble->FStar(e);
904 fFstar.push_back(fstarVal);
905 fstarSorted.push_back(fstarVal);
906 if (TMath::IsNaN(fstarVal)) Log() << kFATAL <<
"F* is NAN!" << Endl;
909 std::sort( fstarSorted.begin(), fstarSorted.end() );
912 fFstarMedian = 0.5*(fstarSorted[ind]+fstarSorted[ind-1]);
915 fFstarMedian = fstarSorted[ind];
926 Double_t TMVA::RuleFitParams::Optimism()
928 Log() << kWARNING <<
"<Optimism> Using unverified code! Check!" << Endl;
929 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
931 Log() << kFATAL <<
"<Optimism> Invalid start/end indices!" << Endl;
934 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
945 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
946 const Event& e = *(*events)[i];
947 yhat = fRuleEnsemble->EvalEvent(i);
948 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? 1.0:-1.0);
949 w = fRuleFit->GetTrainingEventWeight(i)/fNEveEffPerf;
952 sumyhaty += w*yhat*y;
956 Double_t div = 1.0-sumw2;
957 Double_t cov = sumyhaty - sumyhat*sumy;
967 Double_t TMVA::RuleFitParams::ErrorRateReg()
969 Log() << kWARNING <<
"<ErrorRateReg> Using unverified code! Check!" << Endl;
970 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
972 Log() << kFATAL <<
"<ErrorRateReg> Invalid start/end indices!" << Endl;
974 if (fFstar.size()!=neve) {
975 Log() << kFATAL <<
"--- RuleFitParams::ErrorRateReg() - F* not initialized! BUG!!!"
976 <<
" Fstar.size() = " << fFstar.size() <<
" , N(events) = " << neve << Endl;
981 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
984 Double_t sumdfmed = 0;
990 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
991 const Event& e = *(*events)[i];
992 sF = fRuleEnsemble->EvalEvent( e );
994 sumdf += TMath::Abs(fFstar[i-fPerfIdx1] - sF);
995 sumdfmed += TMath::Abs(fFstar[i-fPerfIdx1] - fFstarMedian);
1000 return sumdf/sumdfmed;
1011 Double_t TMVA::RuleFitParams::ErrorRateBin()
1013 Log() << kWARNING <<
"<ErrorRateBin> Using unverified code! Check!" << Endl;
1014 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1016 Log() << kFATAL <<
"<ErrorRateBin> Invalid start/end indices!" << Endl;
1019 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1021 Double_t sumdfbin = 0;
1022 Double_t dneve = Double_t(neve);
1026 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1027 const Event& e = *(*events)[i];
1028 sF = fRuleEnsemble->EvalEvent( e );
1030 signF = (sF>0 ? +1:-1);
1032 signy = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e) ? +1:-1);
1033 sumdfbin += TMath::Abs(Double_t(signF-signy))*0.5;
1035 Double_t f = sumdfbin/dneve;
1045 Double_t TMVA::RuleFitParams::ErrorRateRocRaw( std::vector<Double_t> & sFsig,
1046 std::vector<Double_t> & sFbkg )
1049 std::sort(sFsig.begin(), sFsig.end());
1050 std::sort(sFbkg.begin(), sFbkg.end());
1051 const Double_t minsig = sFsig.front();
1052 const Double_t minbkg = sFbkg.front();
1053 const Double_t maxsig = sFsig.back();
1054 const Double_t maxbkg = sFbkg.back();
1055 const Double_t minf = std::min(minsig,minbkg);
1056 const Double_t maxf = std::max(maxsig,maxbkg);
1057 const Int_t nsig = Int_t(sFsig.size());
1058 const Int_t nbkg = Int_t(sFbkg.size());
1059 const Int_t np = std::min((nsig+nbkg)/4,50);
1060 const Double_t df = (maxf-minf)/(np-1);
1065 std::vector<Double_t>::const_iterator indit;
1080 for (Int_t i=0; i<np; i++) {
1081 fcut = minf + df*Double_t(i);
1082 indit = std::find_if(sFsig.begin(), sFsig.end(),
1083 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1084 nesig = sFsig.end()-indit;
1085 if (TMath::Abs(pnesig-nesig)>0) {
1087 indit = std::find_if(sFbkg.begin(), sFbkg.end(),
1088 std::bind(std::greater_equal<Double_t>(), std::placeholders::_1, fcut));
1089 nrbkg = indit-sFbkg.begin();
1090 rejb = Double_t(nrbkg)/Double_t(nbkg);
1091 effs = Double_t(nesig)/Double_t(nsig);
1095 area += 0.5*TMath::Abs(deffs)*(rejb+prejb);
1101 area += 0.5*(1+rejb)*effs;
1112 Double_t TMVA::RuleFitParams::ErrorRateRoc()
1114 Log() << kWARNING <<
"<ErrorRateRoc> Should not be used in the current version! Check!" << Endl;
1115 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1117 Log() << kFATAL <<
"<ErrorRateRoc> Invalid start/end indices!" << Endl;
1120 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1124 std::vector<Double_t> sFsig;
1125 std::vector<Double_t> sFbkg;
1128 Double_t sumf2sig=0;
1129 Double_t sumf2bkg=0;
1131 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1132 const Event& e = *(*events)[i];
1133 sF = fRuleEnsemble->EvalEvent(i);
1134 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(&e)) {
1135 sFsig.push_back(sF);
1140 sFbkg.push_back(sF);
1145 fsigave = sumfsig/sFsig.size();
1146 fbkgave = sumfbkg/sFbkg.size();
1147 fsigrms = TMath::Sqrt(gTools().ComputeVariance(sumf2sig,sumfsig,sFsig.size()));
1148 fbkgrms = TMath::Sqrt(gTools().ComputeVariance(sumf2bkg,sumfbkg,sFbkg.size()));
1150 return ErrorRateRocRaw( sFsig, sFbkg );
1160 void TMVA::RuleFitParams::ErrorRateRocTst()
1162 Log() << kWARNING <<
"<ErrorRateRocTst> Should not be used in the current version! Check!" << Endl;
1163 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1165 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1169 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1173 std::vector< std::vector<Double_t> > sFsig;
1174 std::vector< std::vector<Double_t> > sFbkg;
1176 sFsig.resize( fGDNTau );
1177 sFbkg.resize( fGDNTau );
1180 for (UInt_t i=fPerfIdx1; i<fPerfIdx2+1; i++) {
1181 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1184 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1185 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) {
1186 sFsig[itau].push_back(sF);
1189 sFbkg[itau].push_back(sF);
1195 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1196 err = ErrorRateRocRaw( sFsig[itau], sFbkg[itau] );
1197 fGDErrTst[itau] = err;
1206 UInt_t TMVA::RuleFitParams::RiskPerfTst()
1208 UInt_t neve = fPerfIdx2-fPerfIdx1+1;
1210 Log() << kFATAL <<
"<ErrorRateRocTst> Invalid start/end indices!" << Endl;
1216 Double_t maxx = -100.0;
1217 Double_t minx = 1e30;
1220 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1221 if (fGDErrTstOK[itau]) {
1223 fGDErrTst[itau] = RiskPerf(itau);
1224 sumx += fGDErrTst[itau];
1225 sumx2 += fGDErrTst[itau]*fGDErrTst[itau];
1226 if (fGDErrTst[itau]>maxx) maxx=fGDErrTst[itau];
1227 if (fGDErrTst[itau]<minx) {
1228 minx=fGDErrTst[itau];
1233 Double_t sigx = TMath::Sqrt(gTools().ComputeVariance( sumx2, sumx, nok ) );
1234 Double_t maxacc = minx+sigx;
1238 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1239 if (fGDErrTstOK[itau]) {
1240 if (fGDErrTst[itau] > maxacc) {
1241 fGDErrTstOK[itau] = kFALSE;
1250 Log() << kVERBOSE <<
"TAU: "
1264 void TMVA::RuleFitParams::MakeTstGradientVector()
1266 UInt_t neve = fPathIdx1-fPathIdx2+1;
1268 Log() << kFATAL <<
"<MakeTstGradientVector> Invalid start/end indices!" << Endl;
1272 Double_t norm = 2.0/fNEveEffPath;
1274 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1277 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1278 if (fGDErrTstOK[itau]) {
1279 for (UInt_t ir=0; ir<fNRules; ir++) {
1280 fGradVecTst[itau][ir]=0;
1282 for (UInt_t il=0; il<fNLinear; il++) {
1283 fGradVecLinTst[itau][il]=0;
1292 const std::vector<UInt_t> *eventRuleMap=0;
1298 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1299 const Event *e = (*events)[i];
1301 if (fRuleEnsemble->DoRules()) {
1302 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1303 nrules = (*eventRuleMap).size();
1305 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1308 if (fGDErrTstOK[itau]) {
1309 sF = fRuleEnsemble->EvalEvent( i, fGDOfsTst[itau], fGDCoefTst[itau], fGDCoefLinTst[itau] );
1310 if (TMath::Abs(sF)<1.0) {
1313 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1314 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1316 for (UInt_t ir=0; ir<nrules; ir++) {
1317 rind = (*eventRuleMap)[ir];
1318 fGradVecTst[itau][rind] += r;
1321 for (UInt_t il=0; il<fNLinear; il++) {
1322 fGradVecLinTst[itau][il] += r*fRuleEnsemble->EvalLinEventRaw( il,i, kTRUE );
1334 void TMVA::RuleFitParams::UpdateTstCoefficients()
1336 Double_t maxr, maxl, cthresh, val;
1338 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1339 if (fGDErrTstOK[itau]) {
1341 maxr = ( (fNRules>0 ?
1342 TMath::Abs(*(std::max_element( fGradVecTst[itau].begin(), fGradVecTst[itau].end(), AbsValue()))):0) );
1343 maxl = ( (fNLinear>0 ?
1344 TMath::Abs(*(std::max_element( fGradVecLinTst[itau].begin(), fGradVecLinTst[itau].end(), AbsValue()))):0) );
1347 Double_t maxv = (maxr>maxl ? maxr:maxl);
1348 cthresh = maxv * fGDTauVec[itau];
1357 const Double_t stepScale=1.0;
1358 for (UInt_t i=0; i<fNRules; i++) {
1359 val = fGradVecTst[itau][i];
1361 if (TMath::Abs(val)>=cthresh) {
1362 fGDCoefTst[itau][i] += fGDPathStep*val*stepScale;
1366 for (UInt_t i=0; i<fNLinear; i++) {
1367 val = fGradVecLinTst[itau][i];
1368 if (TMath::Abs(val)>=cthresh) {
1369 fGDCoefLinTst[itau][i] += fGDPathStep*val*stepScale/fRuleEnsemble->GetLinNorm(i);
1376 CalcTstAverageResponse();
1382 void TMVA::RuleFitParams::MakeGradientVector()
1388 UInt_t neve = fPathIdx2-fPathIdx1+1;
1390 Log() << kFATAL <<
"<MakeGradientVector> Invalid start/end indices!" << Endl;
1394 const Double_t norm = 2.0/fNEveEffPath;
1396 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1399 for (UInt_t ir=0; ir<fNRules; ir++) {
1402 for (UInt_t il=0; il<fNLinear; il++) {
1410 const std::vector<UInt_t> *eventRuleMap=0;
1413 gGDInit += Double_t(clock()-t0)/CLOCKS_PER_SEC;
1415 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1416 const Event *e = (*events)[i];
1419 sF = fRuleEnsemble->EvalEvent( i );
1421 if (TMath::Abs(sF)<1.0) {
1423 if (fRuleEnsemble->DoRules()) {
1424 eventRuleMap = &(fRuleEnsemble->GetEventRuleMap(i));
1425 nrules = (*eventRuleMap).size();
1427 y = (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e)?1.0:-1.0);
1428 r = norm*(y - sF) * fRuleFit->GetTrainingEventWeight(i);
1430 for (UInt_t ir=0; ir<nrules; ir++) {
1431 rind = (*eventRuleMap)[ir];
1432 fGradVec[rind] += r;
1437 for (UInt_t il=0; il<fNLinear; il++) {
1438 fGradVecLin[il] += r*fRuleEnsemble->EvalLinEventRaw( il, i, kTRUE );
1448 void TMVA::RuleFitParams::UpdateCoefficients()
1450 Double_t maxr = ( (fRuleEnsemble->DoRules() ?
1451 TMath::Abs(*(std::max_element( fGradVec.begin(), fGradVec.end(), AbsValue()))):0) );
1452 Double_t maxl = ( (fRuleEnsemble->DoLinear() ?
1453 TMath::Abs(*(std::max_element( fGradVecLin.begin(), fGradVecLin.end(), AbsValue()))):0) );
1455 Double_t maxv = (maxr>maxl ? maxr:maxl);
1456 Double_t cthresh = maxv * fGDTau;
1458 Double_t useRThresh;
1459 Double_t useLThresh;
1463 useRThresh = cthresh;
1464 useLThresh = cthresh;
1466 Double_t gval, lval, coef, lcoef;
1472 for (UInt_t i=0; i<fGradVec.size(); i++) {
1474 if (TMath::Abs(gval)>=useRThresh) {
1475 coef = fRuleEnsemble->GetRulesConst(i)->GetCoefficient() + fGDPathStep*gval;
1476 fRuleEnsemble->GetRules(i)->SetCoefficient(coef);
1481 for (UInt_t i=0; i<fGradVecLin.size(); i++) {
1482 lval = fGradVecLin[i];
1483 if (TMath::Abs(lval)>=useLThresh) {
1484 lcoef = fRuleEnsemble->GetLinCoefficients(i) + (fGDPathStep*lval/fRuleEnsemble->GetLinNorm(i));
1485 fRuleEnsemble->SetLinCoefficient(i,lcoef);
1489 Double_t offset = CalcAverageResponse();
1490 fRuleEnsemble->SetOffset( offset );
1498 void TMVA::RuleFitParams::CalcTstAverageResponse()
1500 for (UInt_t itau=0; itau<fGDNTau; itau++) {
1501 if (fGDErrTstOK[itau]) {
1502 fGDOfsTst[itau] = 0;
1503 for (UInt_t s=0; s<fNLinear; s++) {
1504 fGDOfsTst[itau] -= fGDCoefLinTst[itau][s] * fAverageSelectorPath[s];
1506 for (UInt_t r=0; r<fNRules; r++) {
1507 fGDOfsTst[itau] -= fGDCoefTst[itau][r] * fAverageRulePath[r];
1519 Double_t TMVA::RuleFitParams::CalcAverageResponse()
1522 for (UInt_t s=0; s<fNLinear; s++) {
1523 ofs -= fRuleEnsemble->GetLinCoefficients(s) * fAverageSelectorPath[s];
1525 for (UInt_t r=0; r<fNRules; r++) {
1526 ofs -= fRuleEnsemble->GetRules(r)->GetCoefficient() * fAverageRulePath[r];
1534 Double_t TMVA::RuleFitParams::CalcAverageTruth()
1536 if (fPathIdx2<=fPathIdx1) {
1537 Log() << kFATAL <<
"<CalcAverageTruth> Invalid start/end indices!" << Endl;
1543 const std::vector<const Event *> *events = &(fRuleFit->GetTrainingEvents());
1544 for (UInt_t i=fPathIdx1; i<fPathIdx2+1; i++) {
1545 Double_t ew = fRuleFit->GetTrainingEventWeight(i);
1546 if (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])) ensig += ew;
1548 sum += ew*(fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal((*events)[i])?1.0:-1.0);
1550 Log() << kVERBOSE <<
"Effective number of signal / background = " << ensig <<
" / " << enbkg << Endl;
1552 return sum/fNEveEffPath;
1557 Int_t TMVA::RuleFitParams::Type(
const Event * e )
const {
1558 return (fRuleFit->GetMethodRuleFit()->DataInfo().IsSignal(e) ? 1:-1);
1563 void TMVA::RuleFitParams::SetMsgType( EMsgType t ) {
1564 fLogger->SetMinType(t);