78 REGISTER_METHOD(PDEFoam)
80 ClassImp(TMVA::MethodPDEFoam);
85 TMVA::MethodPDEFoam::MethodPDEFoam( const TString& jobName,
86 const TString& methodTitle,
88 const TString& theOption ) :
89 MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption)
90 , fSigBgSeparated(kFALSE)
100 , fMultiTargetRegression(kFALSE)
106 , fKernelEstimator(NULL)
107 , fTargetSelectionStr("Mean")
108 , fTargetSelection(kMean)
109 , fFillFoamWithOrigWeights(kFALSE)
110 , fUseYesNoCell(kFALSE)
112 , fDTSeparation(kFoam)
123 TMVA::MethodPDEFoam::MethodPDEFoam( DataSetInfo& dsi,
124 const TString& theWeightFile) :
125 MethodBase( Types::kPDEFoam, dsi, theWeightFile)
126 , fSigBgSeparated(kFALSE)
136 , fMultiTargetRegression(kFALSE)
142 , fKernelEstimator(NULL)
143 , fTargetSelectionStr(
"Mean")
144 , fTargetSelection(kMean)
145 , fFillFoamWithOrigWeights(kFALSE)
146 , fUseYesNoCell(kFALSE)
148 , fDTSeparation(kFoam)
160 Bool_t TMVA::MethodPDEFoam::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t )
162 if (type == Types::kClassification && numberClasses == 2)
return kTRUE;
163 if (type == Types::kMulticlass )
return kTRUE;
164 if (type == Types::kRegression)
return kTRUE;
171 void TMVA::MethodPDEFoam::Init(
void )
174 fSigBgSeparated = kFALSE;
179 fnCells = fnActiveCells*2-1;
185 fFillFoamWithOrigWeights = kFALSE;
186 fUseYesNoCell = kFALSE;
188 fDTSeparation = kFoam;
191 fKernelEstimator= NULL;
192 fTargetSelection= kMean;
195 fMultiTargetRegression = kFALSE;
200 SetSignalReferenceCut( 0.0 );
202 SetSignalReferenceCut( 0.5 );
208 void TMVA::MethodPDEFoam::DeclareOptions()
210 DeclareOptionRef( fSigBgSeparated = kFALSE,
"SigBgSeparate",
"Separate foams for signal and background" );
211 DeclareOptionRef( fFrac = 0.001,
"TailCut",
"Fraction of outlier events that are excluded from the foam in each dimension" );
212 DeclareOptionRef( fVolFrac = 1./15.,
"VolFrac",
"Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
213 DeclareOptionRef( fnActiveCells = 500,
"nActiveCells",
"Maximum number of active cells to be created by the foam");
214 DeclareOptionRef( fnSampl = 2000,
"nSampl",
"Number of generated MC events per cell");
215 DeclareOptionRef( fnBin = 5,
"nBin",
"Number of bins in edge histograms");
216 DeclareOptionRef( fCompress = kTRUE,
"Compress",
"Compress foam output file");
217 DeclareOptionRef( fMultiTargetRegression = kFALSE,
"MultiTargetRegression",
"Do regression with multiple targets");
218 DeclareOptionRef( fNmin = 100,
"Nmin",
"Number of events in cell required to split cell");
219 DeclareOptionRef( fMaxDepth = 0,
"MaxDepth",
"Maximum depth of cell tree (0=unlimited)");
220 DeclareOptionRef( fFillFoamWithOrigWeights = kFALSE,
"FillFoamWithOrigWeights",
"Fill foam with original or boost weights");
221 DeclareOptionRef( fUseYesNoCell = kFALSE,
"UseYesNoCell",
"Return -1 or 1 for bkg or signal like events");
222 DeclareOptionRef( fDTLogic =
"None",
"DTLogic",
"Use decision tree algorithm to split cells");
223 AddPreDefVal(TString(
"None"));
224 AddPreDefVal(TString(
"GiniIndex"));
225 AddPreDefVal(TString(
"MisClassificationError"));
226 AddPreDefVal(TString(
"CrossEntropy"));
227 AddPreDefVal(TString(
"GiniIndexWithLaplace"));
228 AddPreDefVal(TString(
"SdivSqrtSplusB"));
230 DeclareOptionRef( fKernelStr =
"None",
"Kernel",
"Kernel type used");
231 AddPreDefVal(TString(
"None"));
232 AddPreDefVal(TString(
"Gauss"));
233 AddPreDefVal(TString(
"LinNeighbors"));
234 DeclareOptionRef( fTargetSelectionStr =
"Mean",
"TargetSelection",
"Target selection method");
235 AddPreDefVal(TString(
"Mean"));
236 AddPreDefVal(TString(
"Mpv"));
243 void TMVA::MethodPDEFoam::DeclareCompatibilityOptions() {
244 MethodBase::DeclareCompatibilityOptions();
245 DeclareOptionRef(fCutNmin = kTRUE,
"CutNmin",
"Requirement for minimal number of events in cell");
246 DeclareOptionRef(fPeekMax = kTRUE,
"PeekMax",
"Peek cell with max. loss for the next split");
252 void TMVA::MethodPDEFoam::ProcessOptions()
254 if (!(fFrac>=0. && fFrac<=1.)) {
255 Log() << kWARNING <<
"TailCut not in [0.,1] ==> using 0.001 instead" << Endl;
259 if (fnActiveCells < 1) {
260 Log() << kWARNING <<
"invalid number of active cells specified: "
261 << fnActiveCells <<
"; setting nActiveCells=2" << Endl;
264 fnCells = fnActiveCells*2-1;
267 if (fSigBgSeparated && fDTLogic !=
"None") {
268 Log() << kFATAL <<
"Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl;
272 if (fDTLogic ==
"None")
273 fDTSeparation = kFoam;
274 else if (fDTLogic ==
"GiniIndex")
275 fDTSeparation = kGiniIndex;
276 else if (fDTLogic ==
"MisClassificationError")
277 fDTSeparation = kMisClassificationError;
278 else if (fDTLogic ==
"CrossEntropy")
279 fDTSeparation = kCrossEntropy;
280 else if (fDTLogic ==
"GiniIndexWithLaplace")
281 fDTSeparation = kGiniIndexWithLaplace;
282 else if (fDTLogic ==
"SdivSqrtSplusB")
283 fDTSeparation = kSdivSqrtSplusB;
285 Log() << kWARNING <<
"Unknown separation type: " << fDTLogic
286 <<
", setting to None" << Endl;
288 fDTSeparation = kFoam;
291 if (fKernelStr ==
"None" ) fKernel = kNone;
292 else if (fKernelStr ==
"Gauss" ) fKernel = kGaus;
293 else if (fKernelStr ==
"LinNeighbors") fKernel = kLinN;
295 if (fTargetSelectionStr ==
"Mean" ) fTargetSelection = kMean;
296 else fTargetSelection = kMpv;
299 if (DoRegression() && Data()->GetNTargets() > 1 && !fMultiTargetRegression) {
300 Log() << kWARNING <<
"Warning: number of targets > 1"
301 <<
" and MultiTargetRegression=F was set, this makes no sense!"
302 <<
" --> I'm setting MultiTargetRegression=T" << Endl;
303 fMultiTargetRegression = kTRUE;
310 TMVA::MethodPDEFoam::~MethodPDEFoam(
void )
314 if (fKernelEstimator != NULL)
315 delete fKernelEstimator;
322 void TMVA::MethodPDEFoam::CalcXminXmax()
326 UInt_t kDim = GetNvar();
327 UInt_t tDim = Data()->GetNTargets();
328 UInt_t vDim = Data()->GetNVariables();
329 if (fMultiTargetRegression)
332 Float_t *xmin =
new Float_t[kDim];
333 Float_t *xmax =
new Float_t[kDim];
336 for (UInt_t dim=0; dim<kDim; dim++) {
341 Log() << kDEBUG <<
"Number of training events: " << Data()->GetNTrainingEvents() << Endl;
342 Int_t nevoutside = (Int_t)((Data()->GetNTrainingEvents())*(fFrac));
343 Int_t rangehistbins = 10000;
347 for (Long64_t i=0; i<(GetNEvents()); i++) {
348 const Event* ev = GetEvent(i);
349 for (UInt_t dim=0; dim<kDim; dim++) {
351 if (fMultiTargetRegression) {
353 val = ev->GetValue(dim);
355 val = ev->GetTarget(dim-vDim);
358 val = ev->GetValue(dim);
370 TH1F **range_h =
new TH1F*[kDim];
371 for (UInt_t dim=0; dim<kDim; dim++) {
372 range_h[dim] =
new TH1F(Form(
"range%i", dim),
"range", rangehistbins, xmin[dim], xmax[dim]);
376 for (Long64_t i=0; i<GetNEvents(); i++) {
377 const Event* ev = GetEvent(i);
378 for (UInt_t dim=0; dim<kDim; dim++) {
379 if (fMultiTargetRegression) {
381 range_h[dim]->Fill(ev->GetValue(dim));
383 range_h[dim]->Fill(ev->GetTarget(dim-vDim));
386 range_h[dim]->Fill(ev->GetValue(dim));
391 for (UInt_t dim=0; dim<kDim; dim++) {
392 for (Int_t i=1; i<(rangehistbins+1); i++) {
393 if (range_h[dim]->Integral(0, i) > nevoutside) {
394 xmin[dim]=range_h[dim]->GetBinLowEdge(i);
398 for (Int_t i=rangehistbins; i>0; i--) {
399 if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
400 xmax[dim]=range_h[dim]->GetBinLowEdge(i+1);
410 for (UInt_t dim=0; dim<kDim; dim++) {
411 fXmin.push_back(xmin[dim]);
412 fXmax.push_back(xmax[dim]);
420 for (UInt_t dim=0; dim<kDim; dim++)
430 void TMVA::MethodPDEFoam::Train(
void )
432 Log() << kVERBOSE <<
"Calculate Xmin and Xmax for every dimension" << Endl;
439 if (DoRegression()) {
440 if (fMultiTargetRegression)
441 TrainMultiTargetRegression();
443 TrainMonoTargetRegression();
447 TrainMultiClassification();
449 if (DataInfo().GetNormalization() !=
"EQUALNUMEVENTS" ) {
450 Log() << kHEADER <<
"NormMode=" << DataInfo().GetNormalization()
451 <<
" chosen. Note that only NormMode=EqualNumEvents"
452 <<
" ensures that Discriminant values correspond to"
453 <<
" signal probabilities." << Endl;
456 Log() << kDEBUG <<
"N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl;
457 Log() << kDEBUG <<
"N_bg for training events: " << Data()->GetNEvtBkgdTrain() << Endl;
458 Log() << kDEBUG <<
"User normalization: " << DataInfo().GetNormalization().Data() << Endl;
461 TrainSeparatedClassification();
463 TrainUnifiedClassification();
468 for(UInt_t i=0; i<fFoam.size(); i++) {
470 fFoam.at(i)->DeleteBinarySearchTree();
481 void TMVA::MethodPDEFoam::TrainSeparatedClassification()
483 TString foamcaption[2];
484 foamcaption[0] =
"SignalFoam";
485 foamcaption[1] =
"BgFoam";
487 for(
int i=0; i<2; i++) {
489 fFoam.push_back( InitFoam(foamcaption[i], kSeparate) );
491 Log() << kVERBOSE <<
"Filling binary search tree of " << foamcaption[i]
492 <<
" with events" << Endl;
494 for (Long64_t k=0; k<GetNEvents(); ++k) {
495 const Event* ev = GetEvent(k);
496 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
497 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
498 fFoam.back()->FillBinarySearchTree(ev);
501 Log() << kINFO <<
"Build up " << foamcaption[i] << Endl;
502 fFoam.back()->Create();
504 Log() << kVERBOSE <<
"Filling foam cells with events" << Endl;
506 for (Long64_t k=0; k<GetNEvents(); ++k) {
507 const Event* ev = GetEvent(k);
508 Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight();
509 if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
510 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
511 fFoam.back()->FillFoamCells(ev, weight);
520 void TMVA::MethodPDEFoam::TrainUnifiedClassification()
522 fFoam.push_back( InitFoam(
"DiscrFoam", kDiscr, fSignalClass) );
524 Log() << kVERBOSE <<
"Filling binary search tree of discriminator foam with events" << Endl;
526 for (Long64_t k=0; k<GetNEvents(); ++k) {
527 const Event* ev = GetEvent(k);
528 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
529 fFoam.back()->FillBinarySearchTree(ev);
532 Log() << kINFO <<
"Build up discriminator foam" << Endl;
533 fFoam.back()->Create();
535 Log() << kVERBOSE <<
"Filling foam cells with events" << Endl;
537 for (Long64_t k=0; k<GetNEvents(); ++k) {
538 const Event* ev = GetEvent(k);
539 Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight();
540 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
541 fFoam.back()->FillFoamCells(ev, weight);
544 Log() << kVERBOSE <<
"Calculate cell discriminator"<< Endl;
546 fFoam.back()->Finalize();
556 void TMVA::MethodPDEFoam::TrainMultiClassification()
558 for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
560 fFoam.push_back( InitFoam(Form(
"MultiClassFoam%u",iClass), kMultiClass, iClass) );
562 Log() << kVERBOSE <<
"Filling binary search tree of multiclass foam "
563 << iClass <<
" with events" << Endl;
565 for (Long64_t k=0; k<GetNEvents(); ++k) {
566 const Event* ev = GetEvent(k);
567 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
568 fFoam.back()->FillBinarySearchTree(ev);
571 Log() << kINFO <<
"Build up multiclass foam " << iClass << Endl;
572 fFoam.back()->Create();
574 Log() << kVERBOSE <<
"Filling foam cells with events" << Endl;
577 for (Long64_t k=0; k<GetNEvents(); ++k) {
578 const Event* ev = GetEvent(k);
579 Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight();
580 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
581 fFoam.back()->FillFoamCells(ev, weight);
584 Log() << kVERBOSE <<
"Calculate cell discriminator"<< Endl;
586 fFoam.back()->Finalize();
595 void TMVA::MethodPDEFoam::TrainMonoTargetRegression()
597 if (Data()->GetNTargets() != 1) {
598 Log() << kFATAL <<
"Can't do mono-target regression with "
599 << Data()->GetNTargets() <<
" targets!" << Endl;
602 Log() << kDEBUG <<
"MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl;
604 fFoam.push_back( InitFoam(
"MonoTargetRegressionFoam", kMonoTarget) );
606 Log() << kVERBOSE <<
"Filling binary search tree with events" << Endl;
608 for (Long64_t k=0; k<GetNEvents(); ++k) {
609 const Event* ev = GetEvent(k);
610 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
611 fFoam.back()->FillBinarySearchTree(ev);
614 Log() << kINFO <<
"Build mono target regression foam" << Endl;
615 fFoam.back()->Create();
617 Log() << kVERBOSE <<
"Filling foam cells with events" << Endl;
619 for (Long64_t k=0; k<GetNEvents(); ++k) {
620 const Event* ev = GetEvent(k);
621 Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight();
622 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
623 fFoam.back()->FillFoamCells(ev, weight);
626 Log() << kVERBOSE <<
"Calculate average cell targets"<< Endl;
628 fFoam.back()->Finalize();
636 void TMVA::MethodPDEFoam::TrainMultiTargetRegression()
638 Log() << kDEBUG <<
"Number of variables: " << Data()->GetNVariables() << Endl;
639 Log() << kDEBUG <<
"Number of Targets: " << Data()->GetNTargets() << Endl;
640 Log() << kDEBUG <<
"Dimension of foam: " << Data()->GetNVariables()+Data()->GetNTargets() << Endl;
642 Log() << kFATAL <<
"LinNeighbors kernel currently not supported"
643 <<
" for multi target regression" << Endl;
645 fFoam.push_back( InitFoam(
"MultiTargetRegressionFoam", kMultiTarget) );
647 Log() << kVERBOSE <<
"Filling binary search tree of multi target regression foam with events"
650 for (Long64_t k=0; k<GetNEvents(); ++k) {
651 Event *ev =
new Event(*GetEvent(k));
654 std::vector<Float_t> targets(ev->GetTargets());
655 const UInt_t nVariables = ev->GetValues().size();
656 for (UInt_t i = 0; i < targets.size(); ++i)
657 ev->SetVal(i+nVariables, targets.at(i));
658 ev->GetTargets().clear();
659 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
660 fFoam.back()->FillBinarySearchTree(ev);
666 Log() << kINFO <<
"Build multi target regression foam" << Endl;
667 fFoam.back()->Create();
669 Log() << kVERBOSE <<
"Filling foam cells with events" << Endl;
671 for (Long64_t k=0; k<GetNEvents(); ++k) {
672 Event *ev =
new Event(*GetEvent(k));
675 std::vector<Float_t> targets = ev->GetTargets();
676 const UInt_t nVariables = ev->GetValues().size();
677 Float_t weight = fFillFoamWithOrigWeights ? ev->GetOriginalWeight() : ev->GetWeight();
678 for (UInt_t i = 0; i < targets.size(); ++i)
679 ev->SetVal(i+nVariables, targets.at(i));
680 ev->GetTargets().clear();
681 if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
682 fFoam.back()->FillFoamCells(ev, weight);
709 Double_t TMVA::MethodPDEFoam::GetMvaValue( Double_t* err, Double_t* errUpper )
711 const Event* ev = GetEvent();
714 if (fSigBgSeparated) {
715 std::vector<Float_t> xvec = ev->GetValues();
717 Double_t density_sig = 0.;
718 Double_t density_bg = 0.;
719 density_sig = fFoam.at(0)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
720 density_bg = fFoam.at(1)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
723 if ( (density_sig+density_bg) > 0 )
724 discr = density_sig/(density_sig+density_bg);
730 discr = fFoam.at(0)->GetCellValue(ev->GetValues(), kValue, fKernelEstimator);
734 if (err || errUpper) {
735 const Double_t discr_error = CalculateMVAError();
736 if (err != 0) *err = discr_error;
737 if (errUpper != 0) *errUpper = discr_error;
741 return (discr < 0.5 ? -1 : 1);
755 Double_t TMVA::MethodPDEFoam::CalculateMVAError()
757 const Event* ev = GetEvent();
758 Double_t mvaError = 0.0;
760 if (fSigBgSeparated) {
761 const std::vector<Float_t>& xvec = ev->GetValues();
763 const Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kValue, fKernelEstimator);
764 const Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kValue, fKernelEstimator);
765 const Double_t scaleB = 1.;
767 const Double_t errorS = neventsS == 0 ? 1.0 : TMath::Sqrt(neventsS);
768 const Double_t errorB = neventsB == 0 ? 1.0 : TMath::Sqrt(neventsB);
770 if ((neventsS > 1e-10) || (neventsB > 1e-10)) {
772 mvaError = TMath::Sqrt(Sqr(scaleB * neventsB / Sqr(neventsS + scaleB * neventsB) * errorS) +
773 Sqr(scaleB * neventsS / Sqr(neventsS + scaleB * neventsB) * errorB));
779 mvaError = fFoam.at(0)->GetCellValue(ev->GetValues(), kValueError, fKernelEstimator);
789 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetMulticlassValues()
791 const TMVA::Event *ev = GetEvent();
792 std::vector<Float_t> xvec = ev->GetValues();
794 if (fMulticlassReturnVal == NULL)
795 fMulticlassReturnVal =
new std::vector<Float_t>();
796 fMulticlassReturnVal->clear();
797 fMulticlassReturnVal->reserve(DataInfo().GetNClasses());
799 std::vector<Float_t> temp;
800 UInt_t nClasses = DataInfo().GetNClasses();
801 temp.reserve(nClasses);
802 for (UInt_t iClass = 0; iClass < nClasses; ++iClass) {
803 temp.push_back(fFoam.at(iClass)->GetCellValue(xvec, kValue, fKernelEstimator));
806 for (UInt_t iClass = 0; iClass < nClasses; ++iClass) {
808 for (UInt_t j = 0; j < nClasses; ++j) {
810 norm += exp(temp[j] - temp[iClass]);
812 fMulticlassReturnVal->push_back(1.0 / (1.0 + norm));
815 return *fMulticlassReturnVal;
823 const TMVA::Ranking* TMVA::MethodPDEFoam::CreateRanking()
826 fRanking =
new Ranking(GetName(),
"Variable Importance");
827 std::vector<Float_t> importance(GetNvar(), 0);
830 for (UInt_t ifoam = 0; ifoam < fFoam.size(); ++ifoam) {
832 PDEFoamCell *root_cell = fFoam.at(ifoam)->GetRootCell();
833 std::vector<UInt_t> nCuts(fFoam.at(ifoam)->GetTotDim(), 0);
834 GetNCuts(root_cell, nCuts);
838 UInt_t sumOfCuts = 0;
839 std::vector<Float_t> tmp_importance;
840 for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
841 sumOfCuts += nCuts.at(ivar);
842 tmp_importance.push_back( nCuts.at(ivar) );
846 for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
848 tmp_importance.at(ivar) /= sumOfCuts;
850 tmp_importance.at(ivar) = 0;
853 for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
854 importance.at(ivar) += tmp_importance.at(ivar) / fFoam.size();
859 for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
860 fRanking->AddRank(Rank(GetInputLabel(ivar), importance.at(ivar)));
876 void TMVA::MethodPDEFoam::GetNCuts(PDEFoamCell *cell, std::vector<UInt_t> &nCuts)
878 if (cell == NULL || cell->GetStat() == 1)
881 nCuts.at(cell->GetBest())++;
883 if (cell->GetDau0() != NULL)
884 GetNCuts(cell->GetDau0(), nCuts);
885 if (cell->GetDau1() != NULL)
886 GetNCuts(cell->GetDau1(), nCuts);
892 void TMVA::MethodPDEFoam::SetXminXmax( TMVA::PDEFoam *pdefoam )
895 Log() << kFATAL <<
"Null pointer given!" << Endl;
899 UInt_t num_vars = GetNvar();
900 if (fMultiTargetRegression)
901 num_vars += Data()->GetNTargets();
903 for (UInt_t idim=0; idim<num_vars; idim++) {
904 Log()<< kDEBUG <<
"foam: SetXmin[dim="<<idim<<
"]: " << fXmin.at(idim) << Endl;
905 Log()<< kDEBUG <<
"foam: SetXmax[dim="<<idim<<
"]: " << fXmax.at(idim) << Endl;
906 pdefoam->SetXmin(idim, fXmin.at(idim));
907 pdefoam->SetXmax(idim, fXmax.at(idim));
934 TMVA::PDEFoam* TMVA::MethodPDEFoam::InitFoam(TString foamcaption, EFoamType ft, UInt_t cls)
938 if (ft == kMultiTarget)
940 dim = Data()->GetNTargets() + Data()->GetNVariables();
945 std::vector<Double_t> box;
946 for (Int_t idim = 0; idim < dim; ++idim) {
947 box.push_back((fXmax.at(idim) - fXmin.at(idim))* fVolFrac);
951 PDEFoam *pdefoam = NULL;
952 PDEFoamDensityBase *density = NULL;
953 if (fDTSeparation == kFoam) {
957 pdefoam =
new PDEFoamEvent(foamcaption);
958 density =
new PDEFoamEventDensity(box);
961 pdefoam =
new PDEFoamMultiTarget(foamcaption, fTargetSelection);
962 density =
new PDEFoamEventDensity(box);
966 pdefoam =
new PDEFoamDiscriminant(foamcaption, cls);
967 density =
new PDEFoamDiscriminantDensity(box, cls);
970 pdefoam =
new PDEFoamTarget(foamcaption, 0);
971 density =
new PDEFoamTargetDensity(box, 0);
974 Log() << kFATAL <<
"Unknown PDEFoam type!" << Endl;
982 SeparationBase *sepType = NULL;
983 switch (fDTSeparation) {
985 sepType =
new GiniIndex();
987 case kMisClassificationError:
988 sepType =
new MisClassificationError();
991 sepType =
new CrossEntropy();
993 case kGiniIndexWithLaplace:
994 sepType =
new GiniIndexWithLaplace();
996 case kSdivSqrtSplusB:
997 sepType =
new SdivSqrtSplusB();
1000 Log() << kFATAL <<
"Separation type " << fDTSeparation
1001 <<
" currently not supported" << Endl;
1007 pdefoam =
new PDEFoamDecisionTree(foamcaption, sepType, cls);
1008 density =
new PDEFoamDecisionTreeDensity(box, cls);
1011 Log() << kFATAL <<
"Decision tree cell split algorithm is only"
1012 <<
" available for (multi) classification with a single"
1013 <<
" PDE-Foam (SigBgSeparate=F)" << Endl;
1018 if (pdefoam) pdefoam->SetDensity(density);
1019 else Log() << kFATAL <<
"PDEFoam pointer not set, exiting.." << Endl;
1022 fKernelEstimator = CreatePDEFoamKernel();
1025 pdefoam->Log().SetMinType(this->Log().GetMinType());
1028 pdefoam->SetDim( dim);
1029 pdefoam->SetnCells( fnCells);
1030 pdefoam->SetnSampl( fnSampl);
1031 pdefoam->SetnBin( fnBin);
1032 pdefoam->SetEvPerBin( fEvPerBin);
1035 pdefoam->SetNmin(fNmin);
1036 pdefoam->SetMaxDepth(fMaxDepth);
1039 pdefoam->Initialize();
1042 SetXminXmax(pdefoam);
1050 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues()
1052 if (fRegressionReturnVal == 0) fRegressionReturnVal =
new std::vector<Float_t>();
1053 fRegressionReturnVal->clear();
1054 fRegressionReturnVal->reserve(Data()->GetNTargets());
1056 const Event* ev = GetEvent();
1057 std::vector<Float_t> vals = ev->GetValues();
1060 Log() << kWARNING <<
"<GetRegressionValues> value vector is empty. " << Endl;
1063 if (fMultiTargetRegression) {
1065 std::map<Int_t, Float_t> xvec;
1066 for (UInt_t i=0; i<vals.size(); ++i)
1067 xvec.insert(std::pair<Int_t, Float_t>(i, vals.at(i)));
1069 std::vector<Float_t> targets = fFoam.at(0)->GetCellValue( xvec, kValue );
1072 if (targets.size() != Data()->GetNTargets())
1073 Log() << kFATAL <<
"Something wrong with multi-target regression foam: "
1074 <<
"number of targets does not match the DataSet()" << Endl;
1075 for(UInt_t i=0; i<targets.size(); i++)
1076 fRegressionReturnVal->push_back(targets.at(i));
1079 fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals, kValue, fKernelEstimator));
1083 Event * evT =
new Event(*ev);
1084 for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1085 evT->SetTarget(itgt, fRegressionReturnVal->at(itgt) );
1087 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
1088 fRegressionReturnVal->clear();
1089 for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1090 fRegressionReturnVal->push_back( evT2->GetTarget(itgt) );
1095 return (*fRegressionReturnVal);
1102 TMVA::PDEFoamKernelBase* TMVA::MethodPDEFoam::CreatePDEFoamKernel()
1106 return new PDEFoamKernelTrivial();
1108 return new PDEFoamKernelLinN();
1110 return new PDEFoamKernelGauss(fVolFrac/2.0);
1112 Log() << kFATAL <<
"Kernel: " << fKernel <<
" not supported!" << Endl;
1121 void TMVA::MethodPDEFoam::DeleteFoams()
1123 for (UInt_t i=0; i<fFoam.size(); i++)
1124 if (fFoam.at(i))
delete fFoam.at(i);
1134 void TMVA::MethodPDEFoam::Reset()
1138 if (fKernelEstimator != NULL) {
1139 delete fKernelEstimator;
1140 fKernelEstimator = NULL;
1146 void TMVA::MethodPDEFoam::PrintCoefficients(
void )
1152 void TMVA::MethodPDEFoam::AddWeightsXMLTo(
void* parent )
const
1154 void* wght = gTools().AddChild(parent,
"Weights");
1155 gTools().AddAttr( wght,
"SigBgSeparated", fSigBgSeparated );
1156 gTools().AddAttr( wght,
"Frac", fFrac );
1157 gTools().AddAttr( wght,
"DiscrErrCut", fDiscrErrCut );
1158 gTools().AddAttr( wght,
"VolFrac", fVolFrac );
1159 gTools().AddAttr( wght,
"nCells", fnCells );
1160 gTools().AddAttr( wght,
"nSampl", fnSampl );
1161 gTools().AddAttr( wght,
"nBin", fnBin );
1162 gTools().AddAttr( wght,
"EvPerBin", fEvPerBin );
1163 gTools().AddAttr( wght,
"Compress", fCompress );
1164 gTools().AddAttr( wght,
"DoRegression", DoRegression() );
1165 gTools().AddAttr( wght,
"CutNmin", fNmin>0 );
1166 gTools().AddAttr( wght,
"Nmin", fNmin );
1167 gTools().AddAttr( wght,
"CutRMSmin",
false );
1168 gTools().AddAttr( wght,
"RMSmin", 0.0 );
1169 gTools().AddAttr( wght,
"Kernel", KernelToUInt(fKernel) );
1170 gTools().AddAttr( wght,
"TargetSelection", TargetSelectionToUInt(fTargetSelection) );
1171 gTools().AddAttr( wght,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1172 gTools().AddAttr( wght,
"UseYesNoCell", fUseYesNoCell );
1176 for (UInt_t i=0; i<fXmin.size(); i++){
1177 xmin_wrap = gTools().AddChild( wght,
"Xmin" );
1178 gTools().AddAttr( xmin_wrap,
"Index", i );
1179 gTools().AddAttr( xmin_wrap,
"Value", fXmin.at(i) );
1182 for (UInt_t i=0; i<fXmax.size(); i++){
1183 xmax_wrap = gTools().AddChild( wght,
"Xmax" );
1184 gTools().AddAttr( xmax_wrap,
"Index", i );
1185 gTools().AddAttr( xmax_wrap,
"Value", fXmax.at(i) );
1195 void TMVA::MethodPDEFoam::WriteFoamsToFile()
const
1198 FillVariableNamesToFoam();
1200 TString rfname( GetWeightFileName() );
1203 rfname.ReplaceAll( TString(
".") + gConfig().GetIONames().fWeightFileExtension +
".txt",
".xml" );
1206 rfname.ReplaceAll(
".xml",
"_foams.root" );
1208 TFile *rootFile = 0;
1209 if (fCompress) rootFile =
new TFile(rfname,
"RECREATE",
"foamfile", 9);
1210 else rootFile =
new TFile(rfname,
"RECREATE");
1213 for (UInt_t i=0; i<fFoam.size(); ++i) {
1214 Log() <<
"writing foam " << fFoam.at(i)->GetFoamName().Data()
1215 <<
" to file" << Endl;
1216 fFoam.at(i)->Write(fFoam.at(i)->GetFoamName().Data());
1220 Log() << kINFO <<
"Foams written to file: "
1221 << gTools().Color(
"lightblue") << rfname << gTools().Color(
"reset") << Endl;
1227 void TMVA::MethodPDEFoam::ReadWeightsFromStream( std::istream& istr )
1229 istr >> fSigBgSeparated;
1231 istr >> fDiscrErrCut;
1241 SetAnalysisType( (regr ? Types::kRegression : Types::kClassification ) );
1243 Bool_t CutNmin, CutRMSmin;
1252 fKernel = UIntToKernel(ker);
1256 fTargetSelection = UIntToTargetSelection(ts);
1258 istr >> fFillFoamWithOrigWeights;
1259 istr >> fUseYesNoCell;
1264 UInt_t kDim = GetNvar();
1265 if (fMultiTargetRegression)
1266 kDim += Data()->GetNTargets();
1267 fXmin.assign(kDim, 0);
1268 fXmax.assign(kDim, 0);
1271 for (UInt_t i=0; i<kDim; i++)
1272 istr >> fXmin.at(i);
1273 for (UInt_t i=0; i<kDim; i++)
1274 istr >> fXmax.at(i);
1277 ReadFoamsFromFile();
1283 void TMVA::MethodPDEFoam::ReadWeightsFromXML(
void* wghtnode )
1285 gTools().ReadAttr( wghtnode,
"SigBgSeparated", fSigBgSeparated );
1286 gTools().ReadAttr( wghtnode,
"Frac", fFrac );
1287 gTools().ReadAttr( wghtnode,
"DiscrErrCut", fDiscrErrCut );
1288 gTools().ReadAttr( wghtnode,
"VolFrac", fVolFrac );
1289 gTools().ReadAttr( wghtnode,
"nCells", fnCells );
1290 gTools().ReadAttr( wghtnode,
"nSampl", fnSampl );
1291 gTools().ReadAttr( wghtnode,
"nBin", fnBin );
1292 gTools().ReadAttr( wghtnode,
"EvPerBin", fEvPerBin );
1293 gTools().ReadAttr( wghtnode,
"Compress", fCompress );
1295 gTools().ReadAttr( wghtnode,
"DoRegression", regr );
1297 gTools().ReadAttr( wghtnode,
"CutNmin", CutNmin );
1298 gTools().ReadAttr( wghtnode,
"Nmin", fNmin );
1301 gTools().ReadAttr( wghtnode,
"CutRMSmin", CutRMSmin );
1302 gTools().ReadAttr( wghtnode,
"RMSmin", RMSmin );
1304 gTools().ReadAttr( wghtnode,
"Kernel", ker );
1305 fKernel = UIntToKernel(ker);
1307 gTools().ReadAttr( wghtnode,
"TargetSelection", ts );
1308 fTargetSelection = UIntToTargetSelection(ts);
1309 if (gTools().HasAttr(wghtnode,
"FillFoamWithOrigWeights"))
1310 gTools().ReadAttr( wghtnode,
"FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1311 if (gTools().HasAttr(wghtnode,
"UseYesNoCell"))
1312 gTools().ReadAttr( wghtnode,
"UseYesNoCell", fUseYesNoCell );
1317 UInt_t kDim = GetNvar();
1318 if (fMultiTargetRegression)
1319 kDim += Data()->GetNTargets();
1320 fXmin.assign(kDim, 0);
1321 fXmax.assign(kDim, 0);
1324 void *xmin_wrap = gTools().GetChild( wghtnode );
1325 for (UInt_t counter=0; counter<kDim; counter++) {
1327 gTools().ReadAttr( xmin_wrap ,
"Index", i );
1329 Log() << kFATAL <<
"dimension index out of range:" << i << Endl;
1330 gTools().ReadAttr( xmin_wrap ,
"Value", fXmin.at(i) );
1331 xmin_wrap = gTools().GetNextChild( xmin_wrap );
1334 void *xmax_wrap = xmin_wrap;
1335 for (UInt_t counter=0; counter<kDim; counter++) {
1337 gTools().ReadAttr( xmax_wrap ,
"Index", i );
1339 Log() << kFATAL <<
"dimension index out of range:" << i << Endl;
1340 gTools().ReadAttr( xmax_wrap ,
"Value", fXmax.at(i) );
1341 xmax_wrap = gTools().GetNextChild( xmax_wrap );
1348 ReadFoamsFromFile();
1351 if (fKernelEstimator != NULL)
1352 delete fKernelEstimator;
1353 fKernelEstimator = CreatePDEFoamKernel();
1374 TMVA::PDEFoam* TMVA::MethodPDEFoam::ReadClonedFoamFromFile(TFile* file,
const TString& foamname)
1377 Log() << kWARNING <<
"<ReadClonedFoamFromFile>: NULL pointer given" << Endl;
1382 PDEFoam *foam = (PDEFoam*) file->Get(foamname);
1387 foam = (PDEFoam*) foam->Clone();
1389 Log() << kWARNING <<
"<ReadClonedFoamFromFile>: " << foamname
1390 <<
" could not be cloned!" << Endl;
1400 void TMVA::MethodPDEFoam::ReadFoamsFromFile()
1402 TString rfname( GetWeightFileName() );
1405 rfname.ReplaceAll( TString(
".") + gConfig().GetIONames().fWeightFileExtension +
".txt",
".xml" );
1408 rfname.ReplaceAll(
".xml",
"_foams.root" );
1410 Log() << kINFO <<
"Read foams from file: " << gTools().Color(
"lightblue")
1411 << rfname << gTools().Color(
"reset") << Endl;
1412 TFile *rootFile =
new TFile( rfname,
"READ" );
1413 if (rootFile->IsZombie()) Log() << kFATAL <<
"Cannot open file \"" << rfname <<
"\"" << Endl;
1416 if (DoRegression()) {
1417 if (fMultiTargetRegression)
1418 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MultiTargetRegressionFoam"));
1420 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"MonoTargetRegressionFoam"));
1422 if (fSigBgSeparated) {
1423 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"SignalFoam"));
1424 fFoam.push_back(ReadClonedFoamFromFile(rootFile,
"BgFoam"));
1427 PDEFoam *foam = ReadClonedFoamFromFile(rootFile,
"DiscrFoam");
1429 fFoam.push_back(foam);
1432 for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
1433 fFoam.push_back(ReadClonedFoamFromFile(rootFile, Form(
"MultiClassFoam%u",iClass)));
1444 for (UInt_t i=0; i<fFoam.size(); ++i) {
1446 Log() << kFATAL <<
"Could not load foam!" << Endl;
1453 TMVA::MethodPDEFoam::EKernel TMVA::MethodPDEFoam::UIntToKernel(UInt_t iker)
1456 case 0:
return kNone;
1457 case 1:
return kGaus;
1458 case 2:
return kLinN;
1460 Log() << kWARNING <<
"<UIntToKernel>: unknown kernel number: " << iker << Endl;
1469 TMVA::ETargetSelection TMVA::MethodPDEFoam::UIntToTargetSelection(UInt_t its)
1472 case 0:
return kMean;
1473 case 1:
return kMpv;
1475 Log() << kWARNING <<
"<UIntToTargetSelection>: unknown method TargetSelection: " << its << Endl;
1484 void TMVA::MethodPDEFoam::FillVariableNamesToFoam()
const
1486 for (UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
1487 for (Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
1488 if(fMultiTargetRegression && (UInt_t)idim>=DataInfo().GetNVariables())
1489 fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
1491 fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
1500 void TMVA::MethodPDEFoam::MakeClassSpecific( std::ostream& ,
const TString& )
const
1507 void TMVA::MethodPDEFoam::GetHelpMessage()
const
1510 Log() << gTools().Color(
"bold") <<
"--- Short description:" << gTools().Color(
"reset") << Endl;
1512 Log() <<
"PDE-Foam is a variation of the PDE-RS method using a self-adapting" << Endl;
1513 Log() <<
"binning method to divide the multi-dimensional variable space into a" << Endl;
1514 Log() <<
"finite number of hyper-rectangles (cells). The binning algorithm " << Endl;
1515 Log() <<
"adjusts the size and position of a predefined number of cells such" << Endl;
1516 Log() <<
"that the variance of the signal and background densities inside the " << Endl;
1517 Log() <<
"cells reaches a minimum" << Endl;
1519 Log() << gTools().Color(
"bold") <<
"--- Use of booking options:" << gTools().Color(
"reset") << Endl;
1521 Log() <<
"The PDEFoam classifier supports two different algorithms: " << Endl;
1523 Log() <<
" (1) Create one foam, which stores the signal over background" << Endl;
1524 Log() <<
" probability density. During foam buildup the variance of the" << Endl;
1525 Log() <<
" discriminant inside the cells is minimised." << Endl;
1527 Log() <<
" Booking option: SigBgSeparated=F" << Endl;
1529 Log() <<
" (2) Create two separate foams, one for the signal events and one for" << Endl;
1530 Log() <<
" background events. During foam buildup the variance of the" << Endl;
1531 Log() <<
" event density inside the cells is minimised separately for" << Endl;
1532 Log() <<
" signal and background." << Endl;
1534 Log() <<
" Booking option: SigBgSeparated=T" << Endl;
1536 Log() <<
"The following options can be set (the listed values are found to be a" << Endl;
1537 Log() <<
"good starting point for most applications):" << Endl;
1539 Log() <<
" SigBgSeparate False Separate Signal and Background" << Endl;
1540 Log() <<
" TailCut 0.001 Fraction of outlier events that excluded" << Endl;
1541 Log() <<
" from the foam in each dimension " << Endl;
1542 Log() <<
" VolFrac 0.0666 Volume fraction (used for density calculation" << Endl;
1543 Log() <<
" during foam build-up) " << Endl;
1544 Log() <<
" nActiveCells 500 Maximal number of active cells in final foam " << Endl;
1545 Log() <<
" nSampl 2000 Number of MC events per cell in foam build-up " << Endl;
1546 Log() <<
" nBin 5 Number of bins used in foam build-up " << Endl;
1547 Log() <<
" Nmin 100 Number of events in cell required to split cell" << Endl;
1548 Log() <<
" Kernel None Kernel type used (possible values are: None," << Endl;
1549 Log() <<
" Gauss)" << Endl;
1550 Log() <<
" Compress True Compress foam output file " << Endl;
1552 Log() <<
" Additional regression options:" << Endl;
1554 Log() <<
"MultiTargetRegression False Do regression with multiple targets " << Endl;
1555 Log() <<
" TargetSelection Mean Target selection method (possible values are: " << Endl;
1556 Log() <<
" Mean, Mpv)" << Endl;
1558 Log() << gTools().Color(
"bold") <<
"--- Performance optimisation:" << gTools().Color(
"reset") << Endl;
1560 Log() <<
"The performance of the two implementations was found to be similar for" << Endl;
1561 Log() <<
"most examples studied. For the same number of cells per foam, the two-" << Endl;
1562 Log() <<
"foam option approximately doubles the amount of computer memory needed" << Endl;
1563 Log() <<
"during classification. For special cases where the event-density" << Endl;
1564 Log() <<
"distribution of signal and background events is very different, the" << Endl;
1565 Log() <<
"two-foam option was found to perform significantly better than the" << Endl;
1566 Log() <<
"option with only one foam." << Endl;
1568 Log() <<
"In order to gain better classification performance we recommend to set" << Endl;
1569 Log() <<
"the parameter \"nActiveCells\" to a high value." << Endl;
1571 Log() <<
"The parameter \"VolFrac\" specifies the size of the sampling volume" << Endl;
1572 Log() <<
"during foam buildup and should be tuned in order to achieve optimal" << Endl;
1573 Log() <<
"performance. A larger box leads to a reduced statistical uncertainty" << Endl;
1574 Log() <<
"for small training samples and to smoother sampling. A smaller box on" << Endl;
1575 Log() <<
"the other hand increases the sensitivity to statistical fluctuations" << Endl;
1576 Log() <<
"in the training samples, but for sufficiently large training samples" << Endl;
1577 Log() <<
"it will result in a more precise local estimate of the sampled" << Endl;
1578 Log() <<
"density. In general, higher dimensional problems require larger box" << Endl;
1579 Log() <<
"sizes, due to the reduced average number of events per box volume. The" << Endl;
1580 Log() <<
"default value of 0.0666 was optimised for an example with 5" << Endl;
1581 Log() <<
"observables and training samples of the order of 50000 signal and" << Endl;
1582 Log() <<
"background events each." << Endl;
1584 Log() <<
"Furthermore kernel weighting can be activated, which will lead to an" << Endl;
1585 Log() <<
"additional performance improvement. Note that Gauss weighting will" << Endl;
1586 Log() <<
"significantly increase the response time of the method. LinNeighbors" << Endl;
1587 Log() <<
"weighting performs a linear interpolation with direct neighbor cells" << Endl;
1588 Log() <<
"for each dimension and is much faster than Gauss weighting." << Endl;
1590 Log() <<
"The classification results were found to be rather insensitive to the" << Endl;
1591 Log() <<
"values of the parameters \"nSamples\" and \"nBin\"." << Endl;