139 REGISTER_METHOD(Likelihood)
141 ClassImp(TMVA::MethodLikelihood);
146 TMVA::MethodLikelihood::MethodLikelihood( const TString& jobName,
147 const TString& methodTitle,
148 DataSetInfo& theData,
149 const TString& theOption ) :
150 TMVA::MethodBase( jobName, Types::kLikelihood, methodTitle, theData, theOption),
151 fEpsilon ( 1.e3 * DBL_MIN ),
152 fTransformLikelihoodOutput( kFALSE ),
156 fHistSig_smooth( 0 ),
157 fHistBgd_smooth( 0 ),
158 fDefaultPDFLik ( 0 ),
164 fAverageEvtPerBin( 0 ),
165 fAverageEvtPerBinVarS (0),
166 fAverageEvtPerBinVarB (0),
167 fKDEfineFactor ( 0 ),
168 fInterpolateString(0)
175 TMVA::MethodLikelihood::MethodLikelihood( DataSetInfo& theData,
176 const TString& theWeightFile) :
177 TMVA::MethodBase( Types::kLikelihood, theData, theWeightFile),
178 fEpsilon ( 1.e3 * DBL_MIN ),
179 fTransformLikelihoodOutput( kFALSE ),
183 fHistSig_smooth( 0 ),
184 fHistBgd_smooth( 0 ),
185 fDefaultPDFLik ( 0 ),
191 fAverageEvtPerBin( 0 ),
192 fAverageEvtPerBinVarS (0),
193 fAverageEvtPerBinVarB (0),
194 fKDEfineFactor ( 0 ),
195 fInterpolateString(0)
202 TMVA::MethodLikelihood::~MethodLikelihood(
void )
204 if (NULL != fDefaultPDFLik)
delete fDefaultPDFLik;
205 if (NULL != fHistSig)
delete fHistSig;
206 if (NULL != fHistBgd)
delete fHistBgd;
207 if (NULL != fHistSig_smooth)
delete fHistSig_smooth;
208 if (NULL != fHistBgd_smooth)
delete fHistBgd_smooth;
209 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
210 if ((*fPDFSig)[ivar] !=0)
delete (*fPDFSig)[ivar];
211 if ((*fPDFBgd)[ivar] !=0)
delete (*fPDFBgd)[ivar];
213 if (NULL != fPDFSig)
delete fPDFSig;
214 if (NULL != fPDFBgd)
delete fPDFBgd;
220 Bool_t TMVA::MethodLikelihood::HasAnalysisType( Types::EAnalysisType type,
221 UInt_t numberClasses, UInt_t )
223 if (type == Types::kClassification && numberClasses == 2)
return kTRUE;
230 void TMVA::MethodLikelihood::Init(
void )
234 fHistSig =
new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
235 fHistBgd =
new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
236 fHistSig_smooth =
new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
237 fHistBgd_smooth =
new std::vector<TH1*> ( GetNvar(), (TH1*)0 );
238 fPDFSig =
new std::vector<TMVA::PDF*>( GetNvar(), (TMVA::PDF*)0 );
239 fPDFBgd =
new std::vector<TMVA::PDF*>( GetNvar(), (TMVA::PDF*)0 );
247 void TMVA::MethodLikelihood::DeclareOptions()
249 DeclareOptionRef( fTransformLikelihoodOutput = kFALSE,
"TransformOutput",
250 "Transform likelihood output by inverse sigmoid function" );
255 TString updatedOptions = GetOptions();
256 fDefaultPDFLik =
new PDF( TString(GetName()) +
" PDF", updatedOptions );
257 fDefaultPDFLik->DeclareOptions();
258 fDefaultPDFLik->ParseOptions();
259 updatedOptions = fDefaultPDFLik->GetOptions();
260 for (UInt_t ivar = 0; ivar< DataInfo().GetNVariables(); ivar++) {
261 (*fPDFSig)[ivar] =
new PDF( Form(
"%s PDF Sig[%d]", GetName(), ivar), updatedOptions,
262 Form(
"Sig[%d]",ivar), fDefaultPDFLik );
263 (*fPDFSig)[ivar]->DeclareOptions();
264 (*fPDFSig)[ivar]->ParseOptions();
265 updatedOptions = (*fPDFSig)[ivar]->GetOptions();
266 (*fPDFBgd)[ivar] =
new PDF( Form(
"%s PDF Bkg[%d]", GetName(), ivar), updatedOptions,
267 Form(
"Bkg[%d]",ivar), fDefaultPDFLik );
268 (*fPDFBgd)[ivar]->DeclareOptions();
269 (*fPDFBgd)[ivar]->ParseOptions();
270 updatedOptions = (*fPDFBgd)[ivar]->GetOptions();
274 SetOptions( updatedOptions );
278 void TMVA::MethodLikelihood::DeclareCompatibilityOptions()
282 MethodBase::DeclareCompatibilityOptions();
283 DeclareOptionRef( fNsmooth = 1,
"NSmooth",
284 "Number of smoothing iterations for the input histograms");
285 DeclareOptionRef( fAverageEvtPerBin = 50,
"NAvEvtPerBin",
286 "Average number of events per PDF bin");
287 DeclareOptionRef( fKDEfineFactor =1. ,
"KDEFineFactor",
288 "Fine tuning factor for Adaptive KDE: Factor to multiply the width of the kernel");
289 DeclareOptionRef( fBorderMethodString =
"None",
"KDEborder",
290 "Border effects treatment (1=no treatment , 2=kernel renormalization, 3=sample mirroring)" );
291 DeclareOptionRef( fKDEiterString =
"Nonadaptive",
"KDEiter",
292 "Number of iterations (1=non-adaptive, 2=adaptive)" );
293 DeclareOptionRef( fKDEtypeString =
"Gauss",
"KDEtype",
294 "KDE kernel type (1=Gauss)" );
295 fAverageEvtPerBinVarS =
new Int_t[GetNvar()];
296 fAverageEvtPerBinVarB =
new Int_t[GetNvar()];
297 fNsmoothVarS =
new Int_t[GetNvar()];
298 fNsmoothVarB =
new Int_t[GetNvar()];
299 fInterpolateString =
new TString[GetNvar()];
300 for(UInt_t i=0; i<GetNvar(); ++i) {
301 fAverageEvtPerBinVarS[i] = fAverageEvtPerBinVarB[i] = 0;
302 fNsmoothVarS[i] = fNsmoothVarB[i] = 0;
303 fInterpolateString[i] =
"";
305 DeclareOptionRef( fAverageEvtPerBinVarS, GetNvar(),
"NAvEvtPerBinSig",
306 "Average num of events per PDF bin and variable (signal)");
307 DeclareOptionRef( fAverageEvtPerBinVarB, GetNvar(),
"NAvEvtPerBinBkg",
308 "Average num of events per PDF bin and variable (background)");
309 DeclareOptionRef(fNsmoothVarS, GetNvar(),
"NSmoothSig",
310 "Number of smoothing iterations for the input histograms");
311 DeclareOptionRef(fNsmoothVarB, GetNvar(),
"NSmoothBkg",
312 "Number of smoothing iterations for the input histograms");
313 DeclareOptionRef(fInterpolateString, GetNvar(),
"PDFInterpol",
"Method of interpolating reference histograms (e.g. Spline2 or KDE)");
320 void TMVA::MethodLikelihood::ProcessOptions()
322 SetSignalReferenceCut( TransformLikelihoodOutput( 0.5, 0.5 ) );
324 fDefaultPDFLik->ProcessOptions();
325 for (UInt_t ivar = 0; ivar< DataInfo().GetNVariables(); ivar++) {
326 (*fPDFBgd)[ivar]->ProcessOptions();
327 (*fPDFSig)[ivar]->ProcessOptions();
339 void TMVA::MethodLikelihood::Train(
void )
341 UInt_t nvar=GetNvar();
342 std::vector<Double_t> xmin(nvar), xmax(nvar);
343 for (UInt_t ivar=0; ivar<nvar; ivar++) {xmin[ivar]=1e30; xmax[ivar]=-1e30;}
345 UInt_t nevents=Data()->GetNEvents();
346 for (UInt_t ievt=0; ievt<nevents; ievt++) {
349 const Event* origEv = Data()->GetEvent(ievt);
350 if (IgnoreEventsWithNegWeightsInTraining() && origEv->GetWeight()<=0)
continue;
352 for (
int cls=0;cls<2;cls++){
353 GetTransformationHandler().SetTransformationReferenceClass(cls);
354 const Event* ev = GetTransformationHandler().Transform( origEv );
355 for (UInt_t ivar=0; ivar<nvar; ivar++) {
356 Float_t value = ev->GetValue(ivar);
357 if (value < xmin[ivar]) xmin[ivar] = value;
358 if (value > xmax[ivar]) xmax[ivar] = value;
365 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
366 TString var = (*fInputVars)[ivar];
373 if (DataInfo().GetVariableInfo(ivar).GetVarType() ==
'I') {
375 Int_t ixmin = TMath::Nint( xmin[ivar] );
376 xmax[ivar]=xmax[ivar]+1;
377 Int_t ixmax = TMath::Nint( xmax[ivar] );
378 Int_t nbins = ixmax - ixmin;
379 (*fHistSig)[ivar] =
new TH1F(GetMethodName()+
"_"+var +
"_sig", var +
" signal training", nbins, ixmin, ixmax );
380 (*fHistBgd)[ivar] =
new TH1F(GetMethodName()+
"_"+var +
"_bgd", var +
" background training", nbins, ixmin, ixmax );
383 UInt_t minNEvt = TMath::Min(Data()->GetNEvtSigTrain(),Data()->GetNEvtBkgdTrain());
384 Int_t nbinsS = (*fPDFSig)[ivar]->GetHistNBins( minNEvt );
385 Int_t nbinsB = (*fPDFBgd)[ivar]->GetHistNBins( minNEvt );
387 (*fHistSig)[ivar] =
new TH1F( Form(
"%s_%s_%s_sig",DataInfo().GetName(),GetMethodName().Data(),var.Data()),
388 Form(
"%s_%s_%s signal training",DataInfo().GetName(),GetMethodName().Data(),var.Data()), nbinsS, xmin[ivar], xmax[ivar] );
389 (*fHistBgd)[ivar] =
new TH1F( Form(
"%s_%s_%s_bgd",DataInfo().GetName(),GetMethodName().Data(),var.Data()),
390 Form(
"%s_%s_%s background training",DataInfo().GetName(),GetMethodName().Data(),var.Data()), nbinsB, xmin[ivar], xmax[ivar] );
395 Log() << kINFO <<
"Filling reference histograms" << Endl;
398 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
402 const Event* origEv = Data()->GetEvent(ievt);
403 if (IgnoreEventsWithNegWeightsInTraining() && origEv->GetWeight()<=0)
continue;
404 GetTransformationHandler().SetTransformationReferenceClass( origEv->GetClass() );
405 const Event* ev = GetTransformationHandler().Transform( origEv );
408 Float_t weight = ev->GetWeight();
411 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
412 Double_t value = ev->GetValue(ivar);
414 if (value >= xmax[ivar]) value = xmax[ivar] - 1.0e-10;
415 else if (value < xmin[ivar]) value = xmin[ivar] + 1.0e-10;
417 if (value >=(*fHistSig)[ivar]->GetXaxis()->GetXmax() ||
418 value <(*fHistSig)[ivar]->GetXaxis()->GetXmin()){
420 <<
"error in filling likelihood reference histograms var="
421 <<(*fInputVars)[ivar]
422 <<
", xmin="<<(*fHistSig)[ivar]->GetXaxis()->GetXmin()
424 <<
", xmax="<<(*fHistSig)[ivar]->GetXaxis()->GetXmax()
427 if (DataInfo().IsSignal(ev)) (*fHistSig)[ivar]->Fill( value, weight );
428 else (*fHistBgd)[ivar]->Fill( value, weight );
433 Log() << kINFO <<
"Building PDF out of reference histograms" << Endl;
434 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
438 (*fPDFSig)[ivar]->BuildPDF( (*fHistSig)[ivar] );
439 (*fPDFBgd)[ivar]->BuildPDF( (*fHistBgd)[ivar] );
441 (*fPDFSig)[ivar]->ValidatePDF( (*fHistSig)[ivar] );
442 (*fPDFBgd)[ivar]->ValidatePDF( (*fHistBgd)[ivar] );
445 if ((*fPDFSig)[ivar]->GetSmoothedHist() != 0) (*fHistSig_smooth)[ivar] = (*fPDFSig)[ivar]->GetSmoothedHist();
446 if ((*fPDFBgd)[ivar]->GetSmoothedHist() != 0) (*fHistBgd_smooth)[ivar] = (*fPDFBgd)[ivar]->GetSmoothedHist();
455 Double_t TMVA::MethodLikelihood::GetMvaValue( Double_t* err, Double_t* errUpper )
460 NoErrorCalc(err, errUpper);
463 TVector vs( GetNvar() );
464 TVector vb( GetNvar() );
469 GetTransformationHandler().SetTransformationReferenceClass( fSignalClass );
472 const Event* ev = GetEvent();
473 for (ivar=0; ivar<GetNvar(); ivar++) vs(ivar) = ev->GetValue(ivar);
475 GetTransformationHandler().SetTransformationReferenceClass( fBackgroundClass );
479 for (ivar=0; ivar<GetNvar(); ivar++) vb(ivar) = ev->GetValue(ivar);
482 Double_t ps(1), pb(1), p(0);
483 for (ivar=0; ivar<GetNvar(); ivar++) {
486 if ((Int_t)ivar == fDropVariable)
continue;
488 Double_t x[2] = { vs(ivar), vb(ivar) };
490 for (UInt_t itype=0; itype < 2; itype++) {
493 if (x[itype] >= (*fPDFSig)[ivar]->GetXmax()) x[itype] = (*fPDFSig)[ivar]->GetXmax() - 1.0e-10;
494 else if (x[itype] < (*fPDFSig)[ivar]->GetXmin()) x[itype] = (*fPDFSig)[ivar]->GetXmin();
497 PDF* pdf = (itype == 0) ? (*fPDFSig)[ivar] : (*fPDFBgd)[ivar];
498 if (pdf == 0) Log() << kFATAL <<
"<GetMvaValue> Reference histograms don't exist" << Endl;
499 TH1* hist = pdf->GetPDFHist();
503 Int_t bin = hist->FindBin(x[itype]);
507 if ((*fPDFSig)[ivar]->GetInterpolMethod() == TMVA::PDF::kSpline0 ||
508 DataInfo().GetVariableInfo(ivar).GetVarType() ==
'N') {
509 p = TMath::Max( hist->GetBinContent(bin), fEpsilon );
512 if ((x[itype] > hist->GetBinCenter(bin) && bin != hist->GetNbinsX()) || bin == 1)
518 Double_t dx = hist->GetBinCenter(bin) - hist->GetBinCenter(nextbin);
519 Double_t dy = hist->GetBinContent(bin) - hist->GetBinContent(nextbin);
520 Double_t like = hist->GetBinContent(bin) + (x[itype] - hist->GetBinCenter(bin)) * dy/dx;
522 p = TMath::Max( like, fEpsilon );
525 if (itype == 0) ps *= p;
531 return TransformLikelihoodOutput( ps, pb );
537 Double_t TMVA::MethodLikelihood::TransformLikelihoodOutput( Double_t ps, Double_t pb )
const
539 if (ps < fEpsilon) ps = fEpsilon;
540 if (pb < fEpsilon) pb = fEpsilon;
541 Double_t r = ps/(ps + pb);
542 if (r >= 1.0) r = 1. - 1.e-15;
544 if (fTransformLikelihoodOutput) {
548 if (r <= 0.0) r = fEpsilon;
549 else if (r >= 1.0) r = 1. - 1.e-15;
552 r = - TMath::Log(1.0/r - 1.0)/tau;
561 void TMVA::MethodLikelihood::WriteOptionsToStream( std::ostream& o,
const TString& prefix )
const
563 Configurable::WriteOptionsToStream( o, prefix);
566 if (fDefaultPDFLik != 0) {
567 o << prefix << std::endl << prefix <<
"#Default Likelihood PDF Options:" << std::endl << prefix << std::endl;
568 fDefaultPDFLik->WriteOptionsToStream( o, prefix );
570 for (UInt_t ivar = 0; ivar < fPDFSig->size(); ivar++) {
571 if ((*fPDFSig)[ivar] != 0) {
572 o << prefix << std::endl << prefix << Form(
"#Signal[%d] Likelihood PDF Options:",ivar) << std::endl << prefix << std::endl;
573 (*fPDFSig)[ivar]->WriteOptionsToStream( o, prefix );
575 if ((*fPDFBgd)[ivar] != 0) {
576 o << prefix << std::endl << prefix <<
"#Background[%d] Likelihood PDF Options:" << std::endl << prefix << std::endl;
577 (*fPDFBgd)[ivar]->WriteOptionsToStream( o, prefix );
585 void TMVA::MethodLikelihood::AddWeightsXMLTo(
void* parent )
const
587 void* wght = gTools().AddChild(parent,
"Weights");
588 gTools().AddAttr(wght,
"NVariables", GetNvar());
589 gTools().AddAttr(wght,
"NClasses", 2);
591 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
592 if ( (*fPDFSig)[ivar]==0 || (*fPDFBgd)[ivar]==0 )
593 Log() << kFATAL <<
"Reference histograms for variable " << ivar
594 <<
" don't exist, can't write it to weight file" << Endl;
595 pdfwrap = gTools().AddChild(wght,
"PDFDescriptor");
596 gTools().AddAttr(pdfwrap,
"VarIndex", ivar);
597 gTools().AddAttr(pdfwrap,
"ClassIndex", 0);
598 (*fPDFSig)[ivar]->AddXMLTo(pdfwrap);
599 pdfwrap = gTools().AddChild(wght,
"PDFDescriptor");
600 gTools().AddAttr(pdfwrap,
"VarIndex", ivar);
601 gTools().AddAttr(pdfwrap,
"ClassIndex", 1);
602 (*fPDFBgd)[ivar]->AddXMLTo(pdfwrap);
609 const TMVA::Ranking* TMVA::MethodLikelihood::CreateRanking()
612 if (fRanking)
delete fRanking;
613 fRanking =
new Ranking( GetName(),
"Delta Separation" );
615 Double_t sepRef = -1, sep = -1;
616 for (Int_t ivar=-1; ivar<(Int_t)GetNvar(); ivar++) {
619 fDropVariable = ivar;
621 TString nameS = Form(
"rS_%i", ivar+1 );
622 TString nameB = Form(
"rB_%i", ivar+1 );
623 TH1* rS =
new TH1F( nameS, nameS, 80, 0, 1 );
624 TH1* rB =
new TH1F( nameB, nameB, 80, 0, 1 );
627 for (Int_t ievt=0; ievt<Data()->GetNTrainingEvents(); ievt++) {
629 const Event* origEv = Data()->GetEvent(ievt);
630 GetTransformationHandler().SetTransformationReferenceClass( origEv->GetClass() );
631 const Event* ev = GetTransformationHandler().Transform(Data()->GetEvent(ievt));
633 Double_t lk = this->GetMvaValue();
634 Double_t w = ev->GetWeight();
635 if (DataInfo().IsSignal(ev)) rS->Fill( lk, w );
636 else rB->Fill( lk, w );
640 sep = TMVA::gTools().GetSeparation( rS, rB );
641 if (ivar == -1) sepRef = sep;
648 if (ivar >= 0) fRanking->AddRank( Rank( DataInfo().GetVariableInfo(ivar).GetInternalName(), sep ) );
659 void TMVA::MethodLikelihood::WriteWeightsToStream( TFile& )
const
661 TString pname =
"PDF_";
662 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
663 (*fPDFSig)[ivar]->Write( pname + GetInputVar( ivar ) +
"_S" );
664 (*fPDFBgd)[ivar]->Write( pname + GetInputVar( ivar ) +
"_B" );
671 void TMVA::MethodLikelihood::ReadWeightsFromXML(
void* wghtnode)
673 TString pname =
"PDF_";
674 Bool_t addDirStatus = TH1::AddDirectoryStatus();
675 TH1::AddDirectory(0);
677 gTools().ReadAttr(wghtnode,
"NVariables",nvars);
678 void* descnode = gTools().GetChild(wghtnode);
679 for (UInt_t ivar=0; ivar<nvars; ivar++){
680 void* pdfnode = gTools().GetChild(descnode);
681 Log() << kDEBUG <<
"Reading signal and background PDF for variable: " << GetInputVar( ivar ) << Endl;
682 if ((*fPDFSig)[ivar] !=0)
delete (*fPDFSig)[ivar];
683 if ((*fPDFBgd)[ivar] !=0)
delete (*fPDFBgd)[ivar];
684 (*fPDFSig)[ivar] =
new PDF( GetInputVar( ivar ) +
" PDF Sig" );
685 (*fPDFBgd)[ivar] =
new PDF( GetInputVar( ivar ) +
" PDF Bkg" );
686 (*fPDFSig)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
687 (*fPDFBgd)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
688 (*(*fPDFSig)[ivar]).ReadXML(pdfnode);
689 descnode = gTools().GetNextChild(descnode);
690 pdfnode = gTools().GetChild(descnode);
691 (*(*fPDFBgd)[ivar]).ReadXML(pdfnode);
692 descnode = gTools().GetNextChild(descnode);
694 TH1::AddDirectory(addDirStatus);
701 void TMVA::MethodLikelihood::ReadWeightsFromStream( std::istream & istr )
703 TString pname =
"PDF_";
704 Bool_t addDirStatus = TH1::AddDirectoryStatus();
705 TH1::AddDirectory(0);
706 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
707 Log() << kDEBUG <<
"Reading signal and background PDF for variable: " << GetInputVar( ivar ) << Endl;
708 if ((*fPDFSig)[ivar] !=0)
delete (*fPDFSig)[ivar];
709 if ((*fPDFBgd)[ivar] !=0)
delete (*fPDFBgd)[ivar];
710 (*fPDFSig)[ivar] =
new PDF(GetInputVar( ivar ) +
" PDF Sig" );
711 (*fPDFBgd)[ivar] =
new PDF(GetInputVar( ivar ) +
" PDF Bkg");
712 (*fPDFSig)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
713 (*fPDFBgd)[ivar]->SetReadingVersion( GetTrainingTMVAVersionCode() );
714 istr >> *(*fPDFSig)[ivar];
715 istr >> *(*fPDFBgd)[ivar];
717 TH1::AddDirectory(addDirStatus);
723 void TMVA::MethodLikelihood::ReadWeightsFromStream( TFile& rf )
725 TString pname =
"PDF_";
726 Bool_t addDirStatus = TH1::AddDirectoryStatus();
727 TH1::AddDirectory(0);
728 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
729 (*fPDFSig)[ivar] = (TMVA::PDF*)rf.Get( Form(
"PDF_%s_S", GetInputVar( ivar ).Data() ) );
730 (*fPDFBgd)[ivar] = (TMVA::PDF*)rf.Get( Form(
"PDF_%s_B", GetInputVar( ivar ).Data() ) );
732 TH1::AddDirectory(addDirStatus);
738 void TMVA::MethodLikelihood::WriteMonitoringHistosToFile(
void )
const
740 Log() << kINFO <<
"Write monitoring histograms to file: " << BaseDir()->GetPath() << Endl;
743 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
744 (*fHistSig)[ivar]->Write();
745 (*fHistBgd)[ivar]->Write();
746 if ((*fHistSig_smooth)[ivar] != 0) (*fHistSig_smooth)[ivar]->Write();
747 if ((*fHistBgd_smooth)[ivar] != 0) (*fHistBgd_smooth)[ivar]->Write();
748 (*fPDFSig)[ivar]->GetPDFHist()->Write();
749 (*fPDFBgd)[ivar]->GetPDFHist()->Write();
751 if ((*fPDFSig)[ivar]->GetNSmoothHist() != 0) (*fPDFSig)[ivar]->GetNSmoothHist()->Write();
752 if ((*fPDFBgd)[ivar]->GetNSmoothHist() != 0) (*fPDFBgd)[ivar]->GetNSmoothHist()->Write();
755 Float_t xmin=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmin();
756 Float_t xmax=((*fPDFSig)[ivar]->GetPDFHist()->GetXaxis())->GetXmax();
757 TH1F* mm =
new TH1F( (*fInputVars)[ivar]+
"_additional_check",
758 (*fInputVars)[ivar]+
"_additional_check", 15000, xmin, xmax );
759 Double_t intBin = (xmax-xmin)/15000;
760 for (Int_t bin=0; bin < 15000; bin++) {
761 Double_t x = (bin + 0.5)*intBin + xmin;
762 mm->SetBinContent(bin+1 ,(*fPDFSig)[ivar]->GetVal(x));
767 TH1* h[2] = { (*fHistSig)[ivar], (*fHistBgd)[ivar] };
768 for (UInt_t i=0; i<2; i++) {
769 TH1* hclone = (TH1F*)h[i]->Clone( TString(h[i]->GetName()) +
"_nice" );
770 hclone->SetName ( TString(h[i]->GetName()) +
"_nice" );
771 hclone->SetTitle( TString(h[i]->GetTitle()) +
"" );
772 if (hclone->GetNbinsX() > 100) {
774 hclone->Rebin( resFactor );
775 hclone->Scale( 1.0/resFactor );
786 void TMVA::MethodLikelihood::MakeClassSpecificHeader( std::ostream& fout,
const TString& )
const
788 fout <<
"#include <math.h>" << std::endl;
789 fout <<
"#include <cstdlib>" << std::endl;
795 void TMVA::MethodLikelihood::MakeClassSpecific( std::ostream& fout,
const TString& className )
const
797 Int_t dp = fout.precision();
798 fout <<
" double fEpsilon;" << std::endl;
800 Int_t * nbin =
new Int_t[GetNvar()];
803 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
804 nbin[ivar]=(*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX();
805 if (nbin[ivar] > nbinMax) nbinMax=nbin[ivar];
808 fout <<
" static float fRefS[][" << nbinMax <<
"]; "
809 <<
"// signal reference vector [nvars][max_nbins]" << std::endl;
810 fout <<
" static float fRefB[][" << nbinMax <<
"]; "
811 <<
"// backgr reference vector [nvars][max_nbins]" << std::endl << std::endl;
812 fout <<
"// if a variable has its PDF encoded as a spline0 --> treat it like an Integer valued one" <<std::endl;
813 fout <<
" bool fHasDiscretPDF[" << GetNvar() <<
"]; "<< std::endl;
814 fout <<
" int fNbin[" << GetNvar() <<
"]; "
815 <<
"// number of bins (discrete variables may have less bins)" << std::endl;
816 fout <<
" double fHistMin[" << GetNvar() <<
"]; " << std::endl;
817 fout <<
" double fHistMax[" << GetNvar() <<
"]; " << std::endl;
819 fout <<
" double TransformLikelihoodOutput( double, double ) const;" << std::endl;
820 fout <<
"};" << std::endl;
821 fout <<
"" << std::endl;
822 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
823 fout <<
"{" << std::endl;
824 fout <<
" fEpsilon = " << fEpsilon <<
";" << std::endl;
825 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
826 fout <<
" fNbin[" << ivar <<
"] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() <<
";" << std::endl;
827 fout <<
" fHistMin[" << ivar <<
"] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetXaxis()->GetXmin() <<
";" << std::endl;
828 fout <<
" fHistMax[" << ivar <<
"] = " << (*fPDFSig)[ivar]->GetPDFHist()->GetXaxis()->GetXmax() <<
";" << std::endl;
830 if ((((*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() != nbin[ivar] ||
831 (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX() != nbin[ivar])
833 (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() != (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX()) {
834 Log() << kFATAL <<
"<MakeClassSpecific> Mismatch in binning of variable "
835 <<
"\"" << GetOriginalVarName(ivar) <<
"\" of type: \'" << DataInfo().GetVariableInfo(ivar).GetVarType()
837 <<
"nxS = " << (*fPDFSig)[ivar]->GetPDFHist()->GetNbinsX() <<
", "
838 <<
"nxB = " << (*fPDFBgd)[ivar]->GetPDFHist()->GetNbinsX()
839 <<
" while we expect " << nbin[ivar]
843 for (UInt_t ivar=0; ivar<GetNvar(); ivar++){
844 if ((*fPDFSig)[ivar]->GetInterpolMethod() == TMVA::PDF::kSpline0)
845 fout <<
" fHasDiscretPDF[" << ivar <<
"] = true; " << std::endl;
847 fout <<
" fHasDiscretPDF[" << ivar <<
"] = false; " << std::endl;
850 fout <<
"}" << std::endl << std::endl;
852 fout <<
"inline double " << className
853 <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
854 fout <<
"{" << std::endl;
855 fout <<
" double ps(1), pb(1);" << std::endl;
856 fout <<
" std::vector<double> inputValuesSig = inputValues;" << std::endl;
857 fout <<
" std::vector<double> inputValuesBgd = inputValues;" << std::endl;
858 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
859 fout <<
" Transform(inputValuesSig,0);" << std::endl;
860 fout <<
" Transform(inputValuesBgd,1);" << std::endl;
862 fout <<
" for (size_t ivar = 0; ivar < GetNvar(); ivar++) {" << std::endl;
864 fout <<
" // dummy at present... will be used for variable transforms" << std::endl;
865 fout <<
" double x[2] = { inputValuesSig[ivar], inputValuesBgd[ivar] };" << std::endl;
867 fout <<
" for (int itype=0; itype < 2; itype++) {" << std::endl;
869 fout <<
" // interpolate linearly between adjacent bins" << std::endl;
870 fout <<
" // this is not useful for discrete variables (or forced Spline0)" << std::endl;
871 fout <<
" int bin = int((x[itype] - fHistMin[ivar])/(fHistMax[ivar] - fHistMin[ivar])*fNbin[ivar]) + 0;" << std::endl;
873 fout <<
" // since the test data sample is in general different from the training sample" << std::endl;
874 fout <<
" // it can happen that the min/max of the training sample are trespassed --> correct this" << std::endl;
875 fout <<
" if (bin < 0) {" << std::endl;
876 fout <<
" bin = 0;" << std::endl;
877 fout <<
" x[itype] = fHistMin[ivar];" << std::endl;
878 fout <<
" }" << std::endl;
879 fout <<
" else if (bin >= fNbin[ivar]) {" << std::endl;
880 fout <<
" bin = fNbin[ivar]-1;" << std::endl;
881 fout <<
" x[itype] = fHistMax[ivar];" << std::endl;
882 fout <<
" }" << std::endl;
884 fout <<
" // find corresponding histogram from cached indices" << std::endl;
885 fout <<
" float ref = (itype == 0) ? fRefS[ivar][bin] : fRefB[ivar][bin];" << std::endl;
887 fout <<
" // sanity check" << std::endl;
888 fout <<
" if (ref < 0) {" << std::endl;
889 fout <<
" std::cout << \"Fatal error in " << className
890 <<
": bin entry < 0 ==> abort\" << std::endl;" << std::endl;
891 fout <<
" std::exit(1);" << std::endl;
892 fout <<
" }" << std::endl;
894 fout <<
" double p = ref;" << std::endl;
896 fout <<
" if (GetType(ivar) != 'I' && !fHasDiscretPDF[ivar]) {" << std::endl;
897 fout <<
" float bincenter = (bin + 0.5)/fNbin[ivar]*(fHistMax[ivar] - fHistMin[ivar]) + fHistMin[ivar];" << std::endl;
898 fout <<
" int nextbin = bin;" << std::endl;
899 fout <<
" if ((x[itype] > bincenter && bin != fNbin[ivar]-1) || bin == 0) " << std::endl;
900 fout <<
" nextbin++;" << std::endl;
901 fout <<
" else" << std::endl;
902 fout <<
" nextbin--; " << std::endl;
904 fout <<
" double refnext = (itype == 0) ? fRefS[ivar][nextbin] : fRefB[ivar][nextbin];" << std::endl;
905 fout <<
" float nextbincenter = (nextbin + 0.5)/fNbin[ivar]*(fHistMax[ivar] - fHistMin[ivar]) + fHistMin[ivar];" << std::endl;
907 fout <<
" double dx = bincenter - nextbincenter;" << std::endl;
908 fout <<
" double dy = ref - refnext;" << std::endl;
909 fout <<
" p += (x[itype] - bincenter) * dy/dx;" << std::endl;
910 fout <<
" }" << std::endl;
912 fout <<
" if (p < fEpsilon) p = fEpsilon; // avoid zero response" << std::endl;
914 fout <<
" if (itype == 0) ps *= p;" << std::endl;
915 fout <<
" else pb *= p;" << std::endl;
916 fout <<
" } " << std::endl;
917 fout <<
" } " << std::endl;
919 fout <<
" // the likelihood ratio (transform it ?)" << std::endl;
920 fout <<
" return TransformLikelihoodOutput( ps, pb ); " << std::endl;
921 fout <<
"}" << std::endl << std::endl;
923 fout <<
"inline double " << className <<
"::TransformLikelihoodOutput( double ps, double pb ) const" << std::endl;
924 fout <<
"{" << std::endl;
925 fout <<
" // returns transformed or non-transformed output" << std::endl;
926 fout <<
" if (ps < fEpsilon) ps = fEpsilon;" << std::endl;
927 fout <<
" if (pb < fEpsilon) pb = fEpsilon;" << std::endl;
928 fout <<
" double r = ps/(ps + pb);" << std::endl;
929 fout <<
" if (r >= 1.0) r = 1. - 1.e-15;" << std::endl;
931 fout <<
" if (" << (fTransformLikelihoodOutput ?
"true" :
"false") <<
") {" << std::endl;
932 fout <<
" // inverse Fermi function" << std::endl;
934 fout <<
" // sanity check" << std::endl;
935 fout <<
" if (r <= 0.0) r = fEpsilon;" << std::endl;
936 fout <<
" else if (r >= 1.0) r = 1. - 1.e-15;" << std::endl;
938 fout <<
" double tau = 15.0;" << std::endl;
939 fout <<
" r = - log(1.0/r - 1.0)/tau;" << std::endl;
940 fout <<
" }" << std::endl;
942 fout <<
" return r;" << std::endl;
943 fout <<
"}" << std::endl;
946 fout <<
"// Clean up" << std::endl;
947 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
948 fout <<
"{" << std::endl;
949 fout <<
" // nothing to clear" << std::endl;
950 fout <<
"}" << std::endl << std::endl;
952 fout <<
"// signal map" << std::endl;
953 fout <<
"float " << className <<
"::fRefS[][" << nbinMax <<
"] = " << std::endl;
954 fout <<
"{ " << std::endl;
955 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
957 for (Int_t ibin=1; ibin<=nbinMax; ibin++) {
958 if (ibin-1 < nbin[ivar])
959 fout << (*fPDFSig)[ivar]->GetPDFHist()->GetBinContent(ibin);
963 if (ibin < nbinMax) fout <<
", ";
965 fout <<
" }, " << std::endl;
967 fout <<
"}; " << std::endl;
970 fout <<
"// background map" << std::endl;
971 fout <<
"float " << className <<
"::fRefB[][" << nbinMax <<
"] = " << std::endl;
972 fout <<
"{ " << std::endl;
973 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
975 fout << std::setprecision(8);
976 for (Int_t ibin=1; ibin<=nbinMax; ibin++) {
977 if (ibin-1 < nbin[ivar])
978 fout << (*fPDFBgd)[ivar]->GetPDFHist()->GetBinContent(ibin);
982 if (ibin < nbinMax) fout <<
", ";
984 fout <<
" }, " << std::endl;
986 fout <<
"}; " << std::endl;
988 fout << std::setprecision(dp);
999 void TMVA::MethodLikelihood::GetHelpMessage()
const
1002 Log() << gTools().Color(
"bold") <<
"--- Short description:" << gTools().Color(
"reset") << Endl;
1004 Log() <<
"The maximum-likelihood classifier models the data with probability " << Endl;
1005 Log() <<
"density functions (PDF) reproducing the signal and background" << Endl;
1006 Log() <<
"distributions of the input variables. Correlations among the " << Endl;
1007 Log() <<
"variables are ignored." << Endl;
1009 Log() << gTools().Color(
"bold") <<
"--- Performance optimisation:" << gTools().Color(
"reset") << Endl;
1011 Log() <<
"Required for good performance are decorrelated input variables" << Endl;
1012 Log() <<
"(PCA transformation via the option \"VarTransform=Decorrelate\"" << Endl;
1013 Log() <<
"may be tried). Irreducible non-linear correlations may be reduced" << Endl;
1014 Log() <<
"by precombining strongly correlated input variables, or by simply" << Endl;
1015 Log() <<
"removing one of the variables." << Endl;
1017 Log() << gTools().Color(
"bold") <<
"--- Performance tuning via configuration options:" << gTools().Color(
"reset") << Endl;
1019 Log() <<
"High fidelity PDF estimates are mandatory, i.e., sufficient training " << Endl;
1020 Log() <<
"statistics is required to populate the tails of the distributions" << Endl;
1021 Log() <<
"It would be a surprise if the default Spline or KDE kernel parameters" << Endl;
1022 Log() <<
"provide a satisfying fit to the data. The user is advised to properly" << Endl;
1023 Log() <<
"tune the events per bin and smooth options in the spline cases" << Endl;
1024 Log() <<
"individually per variable. If the KDE kernel is used, the adaptive" << Endl;
1025 Log() <<
"Gaussian kernel may lead to artefacts, so please always also try" << Endl;
1026 Log() <<
"the non-adaptive one." << Endl;
1027 Log() <<
"" << Endl;
1028 Log() <<
"All tuning parameters must be adjusted individually for each input" << Endl;
1029 Log() <<
"variable!" << Endl;