66 ClassImp(TMVA::MethodLD);
71 TMVA::MethodLD::MethodLD( const TString& jobName,
72 const TString& methodTitle,
74 const TString& theOption ) :
75 MethodBase( jobName, Types::kLD, methodTitle, dsi, theOption),
87 TMVA::MethodLD::MethodLD( DataSetInfo& theData,
const TString& theWeightFile)
88 : MethodBase( Types::kLD, theData, theWeightFile),
100 void TMVA::MethodLD::Init(
void )
102 if(DataInfo().GetNTargets()!=0) fNRegOut = DataInfo().GetNTargets();
105 fLDCoeff =
new vector< vector< Double_t >* >(fNRegOut);
106 for (Int_t iout = 0; iout<fNRegOut; iout++){
107 (*fLDCoeff)[iout] =
new std::vector<Double_t>( GetNvar()+1 );
111 SetSignalReferenceCut( 0.0 );
117 TMVA::MethodLD::~MethodLD(
void )
119 if (fSumMatx) {
delete fSumMatx; fSumMatx = 0; }
120 if (fSumValMatx) {
delete fSumValMatx; fSumValMatx = 0; }
121 if (fCoeffMatx) {
delete fCoeffMatx; fCoeffMatx = 0; }
123 for (vector< vector< Double_t >* >::iterator vi=fLDCoeff->begin(); vi!=fLDCoeff->end(); ++vi){
124 if (*vi) {
delete *vi; *vi = 0; }
126 delete fLDCoeff; fLDCoeff = 0;
133 Bool_t TMVA::MethodLD::HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets )
135 if (type == Types::kClassification && numberClasses == 2)
return kTRUE;
136 else if (type == Types::kRegression && numberTargets == 1) {
137 Log() <<
"regression with " << numberTargets <<
" targets.";
147 void TMVA::MethodLD::Train(
void )
166 Double_t TMVA::MethodLD::GetMvaValue( Double_t* err, Double_t* errUpper )
168 const Event* ev = GetEvent();
170 if (fRegressionReturnVal == NULL) fRegressionReturnVal =
new vector< Float_t >();
171 fRegressionReturnVal->resize( fNRegOut );
173 for (Int_t iout = 0; iout<fNRegOut; iout++) {
174 (*fRegressionReturnVal)[iout] = (*(*fLDCoeff)[iout])[0] ;
177 for (std::vector<Float_t>::const_iterator it = ev->GetValues().begin();it!=ev->GetValues().end();++it){
178 (*fRegressionReturnVal)[iout] += (*(*fLDCoeff)[iout])[++icoeff] * (*it);
183 NoErrorCalc(err, errUpper);
185 return (*fRegressionReturnVal)[0];
191 const std::vector< Float_t >& TMVA::MethodLD::GetRegressionValues()
193 const Event* ev = GetEvent();
195 if (fRegressionReturnVal == NULL) fRegressionReturnVal =
new vector< Float_t >();
196 fRegressionReturnVal->resize( fNRegOut );
198 for (Int_t iout = 0; iout<fNRegOut; iout++) {
199 (*fRegressionReturnVal)[iout] = (*(*fLDCoeff)[iout])[0] ;
202 for (std::vector<Float_t>::const_iterator it = ev->GetValues().begin();it!=ev->GetValues().end();++it){
203 (*fRegressionReturnVal)[iout] += (*(*fLDCoeff)[iout])[++icoeff] * (*it);
208 Event* evT =
new Event(*ev);
209 for (Int_t iout = 0; iout<fNRegOut; iout++) evT->SetTarget(iout,(*fRegressionReturnVal)[iout]);
211 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
212 fRegressionReturnVal->clear();
213 for (Int_t iout = 0; iout<fNRegOut; iout++) fRegressionReturnVal->push_back(evT2->GetTarget(iout));
216 return (*fRegressionReturnVal);
222 void TMVA::MethodLD::InitMatrices(
void )
224 fSumMatx =
new TMatrixD( GetNvar()+1, GetNvar()+1 );
225 fSumValMatx =
new TMatrixD( GetNvar()+1, fNRegOut );
226 fCoeffMatx =
new TMatrixD( GetNvar()+1, fNRegOut );
234 void TMVA::MethodLD::GetSum(
void )
236 const UInt_t nvar = DataInfo().GetNVariables();
238 for (UInt_t ivar = 0; ivar<=nvar; ivar++){
239 for (UInt_t jvar = 0; jvar<=nvar; jvar++) (*fSumMatx)( ivar, jvar ) = 0;
243 Long64_t nevts = Data()->GetNEvents();
244 for (Int_t ievt=0; ievt<nevts; ievt++) {
245 const Event * ev = GetEvent(ievt);
246 Double_t weight = ev->GetWeight();
248 if (IgnoreEventsWithNegWeightsInTraining() && weight <= 0)
continue;
251 (*fSumMatx)( 0, 0 ) += weight;
254 for (UInt_t ivar=0; ivar<nvar; ivar++) {
255 (*fSumMatx)( ivar+1, 0 ) += ev->GetValue( ivar ) * weight;
256 (*fSumMatx)( 0, ivar+1 ) += ev->GetValue( ivar ) * weight;
260 for (UInt_t ivar=0; ivar<nvar; ivar++){
261 for (UInt_t jvar=0; jvar<nvar; jvar++){
262 (*fSumMatx)( ivar+1, jvar+1 ) += ev->GetValue( ivar ) * ev->GetValue( jvar ) * weight;
271 void TMVA::MethodLD::GetSumVal(
void )
273 const UInt_t nvar = DataInfo().GetNVariables();
275 for (Int_t ivar = 0; ivar<fNRegOut; ivar++){
276 for (UInt_t jvar = 0; jvar<=nvar; jvar++){
277 (*fSumValMatx)(jvar,ivar) = 0;
282 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
285 const Event* ev = GetEvent(ievt);
286 Double_t weight = ev->GetWeight();
289 if (IgnoreEventsWithNegWeightsInTraining() && weight <= 0)
continue;
291 for (Int_t ivar=0; ivar<fNRegOut; ivar++) {
293 Double_t val = weight;
295 if (!DoRegression()){
296 val *= DataInfo().IsSignal(ev);
298 val *= ev->GetTarget( ivar );
300 (*fSumValMatx)( 0,ivar ) += val;
301 for (UInt_t jvar=0; jvar<nvar; jvar++) {
302 (*fSumValMatx)(jvar+1,ivar ) += ev->GetValue(jvar) * val;
311 void TMVA::MethodLD::GetLDCoeff(
void )
313 const UInt_t nvar = DataInfo().GetNVariables();
315 for (Int_t ivar = 0; ivar<fNRegOut; ivar++){
316 TMatrixD invSum( *fSumMatx );
317 if ( TMath::Abs(invSum.Determinant()) < 10E-24 ) {
318 Log() << kWARNING <<
"<GetCoeff> matrix is almost singular with determinant="
319 << TMath::Abs(invSum.Determinant())
320 <<
" did you use the variables that are linear combinations or highly correlated?"
323 if ( TMath::Abs(invSum.Determinant()) < 10E-120 ) {
324 Log() << kFATAL <<
"<GetCoeff> matrix is singular with determinant="
325 << TMath::Abs(invSum.Determinant())
326 <<
" did you use the variables that are linear combinations?"
331 fCoeffMatx =
new TMatrixD( invSum * (*fSumValMatx));
332 for (UInt_t jvar = 0; jvar<nvar+1; jvar++) {
333 (*(*fLDCoeff)[ivar])[jvar] = (*fCoeffMatx)(jvar, ivar );
335 if (!DoRegression()) {
336 (*(*fLDCoeff)[ivar])[0]=0.0;
337 for (UInt_t jvar = 1; jvar<nvar+1; jvar++){
338 (*(*fLDCoeff)[ivar])[0]+=(*fCoeffMatx)(jvar,ivar)*(*fSumMatx)(0,jvar)/(*fSumMatx)( 0, 0 );
340 (*(*fLDCoeff)[ivar])[0]/=-2.0;
349 void TMVA::MethodLD::ReadWeightsFromStream( std::istream& istr )
351 for (Int_t iout=0; iout<fNRegOut; iout++){
352 for (UInt_t icoeff=0; icoeff<GetNvar()+1; icoeff++){
353 istr >> (*(*fLDCoeff)[iout])[icoeff];
362 void TMVA::MethodLD::AddWeightsXMLTo(
void* parent )
const
364 void* wght = gTools().AddChild(parent,
"Weights");
365 gTools().AddAttr( wght,
"NOut", fNRegOut );
366 gTools().AddAttr( wght,
"NCoeff", GetNvar()+1 );
367 for (Int_t iout=0; iout<fNRegOut; iout++) {
368 for (UInt_t icoeff=0; icoeff<GetNvar()+1; icoeff++) {
369 void* coeffxml = gTools().AddChild( wght,
"Coefficient" );
370 gTools().AddAttr( coeffxml,
"IndexOut", iout );
371 gTools().AddAttr( coeffxml,
"IndexCoeff", icoeff );
372 gTools().AddAttr( coeffxml,
"Value", (*(*fLDCoeff)[iout])[icoeff] );
380 void TMVA::MethodLD::ReadWeightsFromXML(
void* wghtnode )
383 gTools().ReadAttr( wghtnode,
"NOut", fNRegOut );
384 gTools().ReadAttr( wghtnode,
"NCoeff", ncoeff );
387 if (ncoeff != GetNvar()+1) Log() << kFATAL <<
"Mismatch in number of output variables/coefficients: "
388 << ncoeff <<
" != " << GetNvar()+1 << Endl;
392 for (vector< vector< Double_t >* >::iterator vi=fLDCoeff->begin(); vi!=fLDCoeff->end(); ++vi){
393 if (*vi) {
delete *vi; *vi = 0; }
395 delete fLDCoeff; fLDCoeff = 0;
397 fLDCoeff =
new vector< vector< Double_t >* >(fNRegOut);
398 for (Int_t ivar = 0; ivar<fNRegOut; ivar++) (*fLDCoeff)[ivar] = new std::vector<Double_t>( ncoeff );
400 void* ch = gTools().GetChild(wghtnode);
404 gTools().ReadAttr( ch,
"IndexOut", iout );
405 gTools().ReadAttr( ch,
"IndexCoeff", icoeff );
406 gTools().ReadAttr( ch,
"Value", coeff );
408 (*(*fLDCoeff)[iout])[icoeff] = coeff;
410 ch = gTools().GetNextChild(ch);
417 void TMVA::MethodLD::MakeClassSpecific( std::ostream& fout,
const TString& className )
const
419 fout <<
" std::vector<double> fLDCoefficients;" << std::endl;
420 fout <<
"};" << std::endl;
421 fout <<
"" << std::endl;
422 fout <<
"inline void " << className <<
"::Initialize() " << std::endl;
423 fout <<
"{" << std::endl;
424 for (UInt_t ivar=0; ivar<GetNvar()+1; ivar++) {
425 Int_t dp = fout.precision();
426 fout <<
" fLDCoefficients.push_back( "
427 << std::setprecision(12) << (*(*fLDCoeff)[0])[ivar]
428 << std::setprecision(dp) <<
" );" << std::endl;
431 fout <<
" // sanity check" << std::endl;
432 fout <<
" if (fLDCoefficients.size() != fNvars+1) {" << std::endl;
433 fout <<
" std::cout << \"Problem in class \\\"\" << fClassName << \"\\\"::Initialize: mismatch in number of input values\"" << std::endl;
434 fout <<
" << fLDCoefficients.size() << \" != \" << fNvars+1 << std::endl;" << std::endl;
435 fout <<
" fStatusIsClean = false;" << std::endl;
436 fout <<
" } " << std::endl;
437 fout <<
"}" << std::endl;
439 fout <<
"inline double " << className <<
"::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
440 fout <<
"{" << std::endl;
441 fout <<
" double retval = fLDCoefficients[0];" << std::endl;
442 fout <<
" for (size_t ivar = 1; ivar < fNvars+1; ivar++) {" << std::endl;
443 fout <<
" retval += fLDCoefficients[ivar]*inputValues[ivar-1];" << std::endl;
444 fout <<
" }" << std::endl;
446 fout <<
" return retval;" << std::endl;
447 fout <<
"}" << std::endl;
449 fout <<
"// Clean up" << std::endl;
450 fout <<
"inline void " << className <<
"::Clear() " << std::endl;
451 fout <<
"{" << std::endl;
452 fout <<
" // clear coefficients" << std::endl;
453 fout <<
" fLDCoefficients.clear(); " << std::endl;
454 fout <<
"}" << std::endl;
459 const TMVA::Ranking* TMVA::MethodLD::CreateRanking()
462 fRanking =
new Ranking( GetName(),
"Discr. power" );
464 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
465 fRanking->AddRank( Rank( GetInputLabel(ivar), TMath::Abs((* (*fLDCoeff)[0])[ivar+1] )) );
474 void TMVA::MethodLD::DeclareOptions()
476 AddPreDefVal(TString(
"LD"));
482 void TMVA::MethodLD::ProcessOptions()
484 if (HasTrainingTree()) InitMatrices();
490 void TMVA::MethodLD::PrintCoefficients(
void )
492 Log() << kHEADER <<
"Results for LD coefficients:" << Endl;
494 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
495 Log() << kINFO <<
"NOTE: The coefficients must be applied to TRANFORMED variables" << Endl;
496 Log() << kINFO <<
" List of the transformation: " << Endl;
497 TListIter trIt(&GetTransformationHandler().GetTransformationList());
498 while (VariableTransformBase *trf = (VariableTransformBase*) trIt() ) {
499 Log() << kINFO <<
" -- " << trf->GetName() << Endl;
502 std::vector<TString> vars;
503 std::vector<Double_t> coeffs;
504 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
505 vars .push_back( GetInputLabel(ivar) );
506 coeffs.push_back( (* (*fLDCoeff)[0])[ivar+1] );
508 vars .push_back(
"(offset)" );
509 coeffs.push_back((* (*fLDCoeff)[0])[0] );
510 TMVA::gTools().FormattedOutput( coeffs, vars,
"Variable" ,
"Coefficient", Log() );
511 if (IsNormalised()) {
512 Log() << kINFO <<
"NOTE: You have chosen to use the \"Normalise\" booking option. Hence, the" << Endl;
513 Log() << kINFO <<
" coefficients must be applied to NORMALISED (') variables as follows:" << Endl;
515 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
if (GetInputLabel(ivar).Length() > maxL) maxL = GetInputLabel(ivar).Length();
518 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
520 << std::setw(maxL+9) << TString(
"[") + GetInputLabel(ivar) +
"]' = 2*("
521 << std::setw(maxL+2) << TString(
"[") + GetInputLabel(ivar) +
"]"
522 << std::setw(3) << (GetXmin(ivar) > 0 ?
" - " :
" + ")
523 << std::setw(6) << TMath::Abs(GetXmin(ivar)) << std::setw(3) <<
")/"
524 << std::setw(6) << (GetXmax(ivar) - GetXmin(ivar) )
525 << std::setw(3) <<
" - 1"
528 Log() << kINFO <<
"The TMVA Reader will properly account for this normalisation, but if the" << Endl;
529 Log() << kINFO <<
"LD classifier is applied outside the Reader, the transformation must be" << Endl;
530 Log() << kINFO <<
"implemented -- or the \"Normalise\" option is removed and LD retrained." << Endl;
531 Log() << kINFO << Endl;
541 void TMVA::MethodLD::GetHelpMessage()
const
544 Log() << gTools().Color(
"bold") <<
"--- Short description:" << gTools().Color(
"reset") << Endl;
546 Log() <<
"Linear discriminants select events by distinguishing the mean " << Endl;
547 Log() <<
"values of the signal and background distributions in a trans- " << Endl;
548 Log() <<
"formed variable space where linear correlations are removed." << Endl;
549 Log() <<
"The LD implementation here is equivalent to the \"Fisher\" discriminant" << Endl;
550 Log() <<
"for classification, but also provides linear regression." << Endl;
552 Log() <<
" (More precisely: the \"linear discriminator\" determines" << Endl;
553 Log() <<
" an axis in the (correlated) hyperspace of the input " << Endl;
554 Log() <<
" variables such that, when projecting the output classes " << Endl;
555 Log() <<
" (signal and background) upon this axis, they are pushed " << Endl;
556 Log() <<
" as far as possible away from each other, while events" << Endl;
557 Log() <<
" of a same class are confined in a close vicinity. The " << Endl;
558 Log() <<
" linearity property of this classifier is reflected in the " << Endl;
559 Log() <<
" metric with which \"far apart\" and \"close vicinity\" are " << Endl;
560 Log() <<
" determined: the covariance matrix of the discriminating" << Endl;
561 Log() <<
" variable space.)" << Endl;
563 Log() << gTools().Color(
"bold") <<
"--- Performance optimisation:" << gTools().Color(
"reset") << Endl;
565 Log() <<
"Optimal performance for the linear discriminant is obtained for " << Endl;
566 Log() <<
"linearly correlated Gaussian-distributed variables. Any deviation" << Endl;
567 Log() <<
"from this ideal reduces the achievable separation power. In " << Endl;
568 Log() <<
"particular, no discrimination at all is achieved for a variable" << Endl;
569 Log() <<
"that has the same sample mean for signal and background, even if " << Endl;
570 Log() <<
"the shapes of the distributions are very different. Thus, the linear " << Endl;
571 Log() <<
"discriminant often benefits from a suitable transformation of the " << Endl;
572 Log() <<
"input variables. For example, if a variable x in [-1,1] has a " << Endl;
573 Log() <<
"a parabolic signal distributions, and a uniform background" << Endl;
574 Log() <<
"distributions, their mean value is zero in both cases, leading " << Endl;
575 Log() <<
"to no separation. The simple transformation x -> |x| renders this " << Endl;
576 Log() <<
"variable powerful for the use in a linear discriminant." << Endl;
578 Log() << gTools().Color(
"bold") <<
"--- Performance tuning via configuration options:" << gTools().Color(
"reset") << Endl;
580 Log() <<
"<None>" << Endl;