224 ClassImp(TPrincipal);
229 TPrincipal::TPrincipal()
232 fCovarianceMatrix(1,1),
240 fIsNormalised = kFALSE;
241 fNumberOfDataPoints = 0;
242 fNumberOfVariables = 0;
253 TPrincipal::TPrincipal(Int_t nVariables, Option_t *opt)
254 : fMeanValues(nVariables),
256 fCovarianceMatrix(nVariables,nVariables),
257 fEigenVectors(nVariables,nVariables),
258 fEigenValues(nVariables),
259 fOffDiagonal(nVariables),
262 if (nVariables <= 1) {
263 Error(
"TPrincipal",
"You can't be serious - nVariables == 1!!!");
267 SetName(
"principal");
271 fIsNormalised = kFALSE;
272 fNumberOfDataPoints = 0;
273 fNumberOfVariables = nVariables;
274 while (strlen(opt) > 0) {
278 fIsNormalised = kTRUE;
289 if (!fMeanValues.IsValid())
290 Error(
"TPrincipal",
"Couldn't create vector mean values");
291 if (!fSigmas.IsValid())
292 Error(
"TPrincipal",
"Couldn't create vector sigmas");
293 if (!fCovarianceMatrix.IsValid())
294 Error(
"TPrincipal",
"Couldn't create covariance matrix");
295 if (!fEigenVectors.IsValid())
296 Error(
"TPrincipal",
"Couldn't create eigenvector matrix");
297 if (!fEigenValues.IsValid())
298 Error(
"TPrincipal",
"Couldn't create eigenvalue vector");
299 if (!fOffDiagonal.IsValid())
300 Error(
"TPrincipal",
"Couldn't create offdiagonal vector");
302 fUserData.ResizeTo(nVariables*1000);
304 if (!fUserData.IsValid())
305 Error(
"TPrincipal",
"Couldn't create user data vector");
312 TPrincipal::TPrincipal(
const TPrincipal& pr) :
314 fNumberOfDataPoints(pr.fNumberOfDataPoints),
315 fNumberOfVariables(pr.fNumberOfVariables),
316 fMeanValues(pr.fMeanValues),
318 fCovarianceMatrix(pr.fCovarianceMatrix),
319 fEigenVectors(pr.fEigenVectors),
320 fEigenValues(pr.fEigenValues),
321 fOffDiagonal(pr.fOffDiagonal),
322 fUserData(pr.fUserData),
324 fHistograms(pr.fHistograms),
325 fIsNormalised(pr.fIsNormalised),
326 fStoreData(pr.fStoreData)
333 TPrincipal& TPrincipal::operator=(
const TPrincipal& pr)
336 TNamed::operator=(pr);
337 fNumberOfDataPoints=pr.fNumberOfDataPoints;
338 fNumberOfVariables=pr.fNumberOfVariables;
339 fMeanValues=pr.fMeanValues;
341 fCovarianceMatrix=pr.fCovarianceMatrix;
342 fEigenVectors=pr.fEigenVectors;
343 fEigenValues=pr.fEigenValues;
344 fOffDiagonal=pr.fOffDiagonal;
345 fUserData=pr.fUserData;
347 fHistograms=pr.fHistograms;
348 fIsNormalised=pr.fIsNormalised;
349 fStoreData=pr.fStoreData;
357 TPrincipal::~TPrincipal()
360 fHistograms->Delete();
410 void TPrincipal::AddRow(
const Double_t *p)
417 if (++fNumberOfDataPoints == 1) {
418 for (i = 0; i < fNumberOfVariables; i++)
419 fMeanValues(i) = p[i];
423 Double_t cor = 1 - 1./Double_t(fNumberOfDataPoints);
424 for (i = 0; i < fNumberOfVariables; i++) {
426 fMeanValues(i) *= cor;
427 fMeanValues(i) += p[i] / Double_t(fNumberOfDataPoints);
428 Double_t t1 = (p[i] - fMeanValues(i)) / (fNumberOfDataPoints - 1);
431 for (j = 0; j < i + 1; j++) {
432 fCovarianceMatrix(i,j) *= cor;
433 fCovarianceMatrix(i,j) += t1 * (p[j] - fMeanValues(j));
443 Int_t size = fUserData.GetNrows();
444 if (fNumberOfDataPoints * fNumberOfVariables > size)
445 fUserData.ResizeTo(size + size/2);
447 for (i = 0; i < fNumberOfVariables; i++) {
448 j = (fNumberOfDataPoints-1) * fNumberOfVariables + i;
457 void TPrincipal::Browse(TBrowser *b)
460 TIter next(fHistograms);
462 while ((h = (TH1*)next()))
463 b->Add(h,h->GetName());
467 b->Add(&fUserData,
"User Data");
468 b->Add(&fCovarianceMatrix,
"Covariance Matrix");
469 b->Add(&fMeanValues,
"Mean value vector");
470 b->Add(&fSigmas,
"Sigma value vector");
471 b->Add(&fEigenValues,
"Eigenvalue vector");
472 b->Add(&fEigenVectors,
"Eigenvector Matrix");
480 void TPrincipal::Clear(Option_t *opt)
483 fHistograms->Delete(opt);
486 fNumberOfDataPoints = 0;
488 fCovarianceMatrix.Zero();
489 fEigenVectors.Zero();
496 fUserData.ResizeTo(fNumberOfVariables * 1000);
507 const Double_t *TPrincipal::GetRow(Int_t row)
509 if (row >= fNumberOfDataPoints)
515 Int_t index = row * fNumberOfVariables;
516 return &fUserData(index);
544 void TPrincipal::MakeCode(
const char *filename, Option_t *opt)
546 TString outName(filename);
547 if (!outName.EndsWith(
".C") && !outName.EndsWith(
".cxx"))
550 MakeRealCode(outName.Data(),
"",opt);
569 void TPrincipal::MakeHistograms(
const char *name, Option_t *opt)
571 Bool_t makeX = kFALSE;
572 Bool_t makeD = kFALSE;
573 Bool_t makeP = kFALSE;
574 Bool_t makeE = kFALSE;
575 Bool_t makeS = kFALSE;
577 Int_t len = strlen(opt);
579 for (i = 0; i < len; i++) {
606 Warning(
"MakeHistograms",
"Unknown option: %c",opt[i]);
611 if (!makeX && !makeD && !makeP && !makeE && !makeS)
616 fHistograms =
new TList;
619 if (makeX && fHistograms->FindObject(Form(
"%s_x000",name)))
621 if (makeD && fHistograms->FindObject(Form(
"%s_d000",name)))
623 if (makeP && fHistograms->FindObject(Form(
"%s_p000",name)))
625 if (makeE && fHistograms->FindObject(Form(
"%s_e",name)))
627 if (makeS && fHistograms->FindObject(Form(
"%s_s",name)))
638 hX =
new TH1F * [fNumberOfVariables];
641 hD =
new TH2F * [fNumberOfVariables];
644 hP =
new TH1F * [fNumberOfVariables];
647 hE =
new TH1F(Form(
"%s_e",name),
"Eigenvalues of Covariance matrix",
648 fNumberOfVariables,0,fNumberOfVariables);
649 hE->SetXTitle(
"Eigenvalue");
650 fHistograms->Add(hE);
654 hS =
new TH1F(Form(
"%s_s",name),
"E_{N}",
655 fNumberOfVariables-1,1,fNumberOfVariables);
657 hS->SetYTitle(
"#sum_{i=1}^{M} (x_{i} - x'_{N,i})^{2}");
658 fHistograms->Add(hS);
662 for (i = 0; i < fNumberOfVariables; i++) {
666 Double_t xlowb = fMeanValues(i) - 4 * fSigmas(i);
667 Double_t xhighb = fMeanValues(i) + 4 * fSigmas(i);
668 Int_t xbins = fNumberOfDataPoints/100;
669 hX[i] =
new TH1F(Form(
"%s_x%03d", name, i),
670 Form(
"Pattern space, variable %d", i),
672 hX[i]->SetXTitle(Form(
"x_{%d}",i));
673 fHistograms->Add(hX[i]);
679 Double_t dhighb = 20;
680 Int_t dbins = fNumberOfDataPoints/100;
681 hD[i] =
new TH2F(Form(
"%s_d%03d", name, i),
682 Form(
"Distance from pattern to "
683 "feature space, variable %d", i),
685 fNumberOfVariables-1,
688 hD[i]->SetXTitle(Form(
"|x_{%d} - x'_{%d,N}|/#sigma_{%d}",i,i,i));
689 hD[i]->SetYTitle(
"N");
690 fHistograms->Add(hD[i]);
697 Double_t et = TMath::Abs(fEigenValues(i) * fTrace);
698 Double_t plowb = -10 * TMath::Sqrt(et);
699 Double_t phighb = -plowb;
701 hP[i] =
new TH1F(Form(
"%s_p%03d", name, i),
702 Form(
"Feature space, variable %d", i),
704 hP[i]->SetXTitle(Form(
"p_{%d}",i));
705 fHistograms->Add(hP[i]);
710 hE->Fill(i,fEigenValues(i));
713 if (!makeX && !makeP && !makeD && !makeS) {
724 Double_t *p =
new Double_t[fNumberOfVariables];
725 Double_t *d =
new Double_t[fNumberOfVariables];
726 for (i = 0; i < fNumberOfDataPoints; i++) {
729 for (j = 0; j < fNumberOfVariables; j++)
733 x = (Double_t*)(GetRow(i));
736 if (makeP||makeD||makeS)
740 if (makeD || makeS) {
744 for (j = fNumberOfVariables; j > 0; j--) {
747 for (k = 0; k < fNumberOfVariables; k++) {
752 hS->Fill(j,d[k]*d[k]);
755 d[k] = TMath::Abs(d[k]) / (fIsNormalised ? fSigmas(k) : 1);
756 (hD[k])->Fill(d[k],j);
765 for (j = 0; j < fNumberOfVariables; j++) {
788 hS->Scale(Double_t(1.)/fNumberOfDataPoints);
794 void TPrincipal::MakeNormalised()
797 for (i = 0; i < fNumberOfVariables; i++) {
798 fSigmas(i) = TMath::Sqrt(fCovarianceMatrix(i,i));
800 for (j = 0; j <= i; j++)
801 fCovarianceMatrix(i,j) /= (fSigmas(i) * fSigmas(j));
803 fTrace += fCovarianceMatrix(i,i);
807 for (i = 0; i < fNumberOfVariables; i++)
808 for (j = 0; j <= i; j++) {
809 fCovarianceMatrix(i,j) /= fTrace;
810 fCovarianceMatrix(j,i) = fCovarianceMatrix(i,j);
856 void TPrincipal::MakeMethods(
const char *classname, Option_t *opt)
859 MakeRealCode(Form(
"%sPCA.cxx", classname), classname, opt);
869 void TPrincipal::MakePrincipals()
874 TMatrixDSym sym; sym.Use(fCovarianceMatrix.GetNrows(),fCovarianceMatrix.GetMatrixArray());
875 TMatrixDSymEigen eigen(sym);
876 fEigenVectors = eigen.GetEigenVectors();
877 fEigenValues = eigen.GetEigenValues();
879 for (Int_t i = 0; i < fNumberOfVariables; i++) {
880 if (fEigenValues[i] < 0) fEigenValues[i] = -fEigenValues[i];
891 void TPrincipal::MakeRealCode(
const char *filename,
const char *classname,
894 Bool_t isMethod = (classname[0] ==
'\0' ? kFALSE : kTRUE);
895 const char *prefix = (isMethod ? Form(
"%s::", classname) :
"");
896 const char *cv_qual = (isMethod ?
"" :
"static ");
898 std::ofstream outFile(filename,std::ios::out|std::ios::trunc);
900 Error(
"MakeRealCode",
"couldn't open output file '%s'",filename);
904 std::cout <<
"Writing on file \"" << filename <<
"\" ... " << std::flush;
909 outFile <<
"// -*- mode: c++ -*-" << std::endl;
911 outFile <<
"// " << std::endl
912 <<
"// File " << filename
913 <<
" generated by TPrincipal::MakeCode" << std::endl;
916 outFile <<
"// on " << date.AsString() << std::endl;
918 outFile <<
"// ROOT version " << gROOT->GetVersion()
919 << std::endl <<
"//" << std::endl;
921 outFile <<
"// This file contains the functions " << std::endl
923 <<
"// void " << prefix
924 <<
"X2P(Double_t *x, Double_t *p); " << std::endl
925 <<
"// void " << prefix
926 <<
"P2X(Double_t *p, Double_t *x, Int_t nTest);"
927 << std::endl <<
"//" << std::endl
928 <<
"// The first for transforming original data x in " << std::endl
929 <<
"// pattern space, to principal components p in " << std::endl
930 <<
"// feature space. The second function is for the" << std::endl
931 <<
"// inverse transformation, but using only nTest" << std::endl
932 <<
"// of the principal components in the expansion" << std::endl
933 <<
"// " << std::endl
934 <<
"// See TPrincipal class documentation for more "
935 <<
"information " << std::endl <<
"// " << std::endl;
937 outFile <<
"#ifndef __CINT__" << std::endl;
940 outFile <<
"#include \"" << classname <<
".h\"" << std::endl;
943 outFile <<
"#include <Rtypes.h> // needed for Double_t etc" << std::endl;
945 outFile <<
"#endif" << std::endl << std::endl;
954 outFile <<
"//" << std::endl
955 <<
"// Static data variables" << std::endl
956 <<
"//" << std::endl;
957 outFile << cv_qual <<
"Int_t " << prefix <<
"gNVariables = "
958 << fNumberOfVariables <<
";" << std::endl;
964 outFile << std::endl <<
"// Assignment of eigenvector matrix." << std::endl
965 <<
"// Elements are stored row-wise, that is" << std::endl
966 <<
"// M[i][j] = e[i * nVariables + j] " << std::endl
967 <<
"// where i and j are zero-based" << std::endl;
968 outFile << cv_qual <<
"Double_t " << prefix
969 <<
"gEigenVectors[] = {" << std::flush;
971 for (i = 0; i < fNumberOfVariables; i++) {
972 for (j = 0; j < fNumberOfVariables; j++) {
973 Int_t index = i * fNumberOfVariables + j;
974 outFile << (index != 0 ?
"," :
"" ) << std::endl
975 <<
" " << fEigenVectors(i,j) << std::flush;
978 outFile <<
"};" << std::endl << std::endl;
981 outFile <<
"// Assignment to eigen value vector. Zero-based." << std::endl;
982 outFile << cv_qual <<
"Double_t " << prefix
983 <<
"gEigenValues[] = {" << std::flush;
984 for (i = 0; i < fNumberOfVariables; i++)
985 outFile << (i != 0 ?
"," :
"") << std::endl
986 <<
" " << fEigenValues(i) << std::flush;
987 outFile << std::endl <<
"};" << std::endl << std::endl;
990 outFile <<
"// Assignment to mean value vector. Zero-based." << std::endl;
991 outFile << cv_qual <<
"Double_t " << prefix
992 <<
"gMeanValues[] = {" << std::flush;
993 for (i = 0; i < fNumberOfVariables; i++)
994 outFile << (i != 0 ?
"," :
"") << std::endl
995 <<
" " << fMeanValues(i) << std::flush;
996 outFile << std::endl <<
"};" << std::endl << std::endl;
999 outFile <<
"// Assignment to sigma value vector. Zero-based." << std::endl;
1000 outFile << cv_qual <<
"Double_t " << prefix
1001 <<
"gSigmaValues[] = {" << std::flush;
1002 for (i = 0; i < fNumberOfVariables; i++)
1003 outFile << (i != 0 ?
"," :
"") << std::endl
1004 <<
" " << (fIsNormalised ? fSigmas(i) : 1) << std::flush;
1006 outFile << std::endl <<
"};" << std::endl << std::endl;
1013 outFile <<
"// " << std::endl
1015 << (isMethod ?
"method " :
"function ")
1016 <<
" void " << prefix
1017 <<
"X2P(Double_t *x, Double_t *p)"
1018 << std::endl <<
"// " << std::endl;
1019 outFile <<
"void " << prefix
1020 <<
"X2P(Double_t *x, Double_t *p) {" << std::endl
1021 <<
" for (Int_t i = 0; i < gNVariables; i++) {" << std::endl
1022 <<
" p[i] = 0;" << std::endl
1023 <<
" for (Int_t j = 0; j < gNVariables; j++)" << std::endl
1024 <<
" p[i] += (x[j] - gMeanValues[j]) " << std::endl
1025 <<
" * gEigenVectors[j * gNVariables + i] "
1026 <<
"/ gSigmaValues[j];" << std::endl << std::endl <<
" }"
1027 << std::endl <<
"}" << std::endl << std::endl;
1031 outFile <<
"// " << std::endl <<
"// The "
1032 << (isMethod ?
"method " :
"function ")
1033 <<
" void " << prefix
1034 <<
"P2X(Double_t *p, Double_t *x, Int_t nTest)"
1035 << std::endl <<
"// " << std::endl;
1036 outFile <<
"void " << prefix
1037 <<
"P2X(Double_t *p, Double_t *x, Int_t nTest) {" << std::endl
1038 <<
" for (Int_t i = 0; i < gNVariables; i++) {" << std::endl
1039 <<
" x[i] = gMeanValues[i];" << std::endl
1040 <<
" for (Int_t j = 0; j < nTest; j++)" << std::endl
1041 <<
" x[i] += p[j] * gSigmaValues[i] " << std::endl
1042 <<
" * gEigenVectors[i * gNVariables + j];" << std::endl
1043 <<
" }" << std::endl <<
"}" << std::endl << std::endl;
1046 outFile <<
"// EOF for " << filename << std::endl;
1051 std::cout <<
"done" << std::endl;
1060 void TPrincipal::P2X(
const Double_t *p, Double_t *x, Int_t nTest)
1062 for (Int_t i = 0; i < fNumberOfVariables; i++){
1063 x[i] = fMeanValues(i);
1064 for (Int_t j = 0; j < nTest; j++)
1065 x[i] += p[j] * (fIsNormalised ? fSigmas(i) : 1)
1066 * fEigenVectors(i,j);
1080 void TPrincipal::Print(Option_t *opt)
const
1082 Bool_t printV = kFALSE;
1083 Bool_t printM = kFALSE;
1084 Bool_t printS = kFALSE;
1085 Bool_t printE = kFALSE;
1087 Int_t len = strlen(opt);
1088 for (Int_t i = 0; i < len; i++) {
1107 Warning(
"Print",
"Unknown option '%c'",opt[i]);
1112 if (printM||printS||printE) {
1113 std::cout <<
" Variable # " << std::flush;
1115 std::cout <<
"| Mean Value " << std::flush;
1117 std::cout <<
"| Sigma " << std::flush;
1119 std::cout <<
"| Eigenvalue" << std::flush;
1120 std::cout << std::endl;
1122 std::cout <<
"-------------" << std::flush;
1124 std::cout <<
"+------------" << std::flush;
1126 std::cout <<
"+------------" << std::flush;
1128 std::cout <<
"+------------" << std::flush;
1129 std::cout << std::endl;
1131 for (Int_t i = 0; i < fNumberOfVariables; i++) {
1132 std::cout << std::setw(12) << i <<
" " << std::flush;
1134 std::cout <<
"| " << std::setw(10) << std::setprecision(4)
1135 << fMeanValues(i) <<
" " << std::flush;
1137 std::cout <<
"| " << std::setw(10) << std::setprecision(4)
1138 << fSigmas(i) <<
" " << std::flush;
1140 std::cout <<
"| " << std::setw(10) << std::setprecision(4)
1141 << fEigenValues(i) <<
" " << std::flush;
1142 std::cout << std::endl;
1144 std::cout << std::endl;
1148 for (Int_t i = 0; i < fNumberOfVariables; i++) {
1149 std::cout <<
"Eigenvector # " << i << std::flush;
1150 TVectorD v(fNumberOfVariables);
1151 v = TMatrixDColumn_const(fEigenVectors,i);
1169 void TPrincipal::SumOfSquareResiduals(
const Double_t *x, Double_t *s)
1179 for (Int_t i = fNumberOfVariables-1; i >= 0; i--) {
1181 for (Int_t j = 0; j < fNumberOfVariables; j++) {
1182 s[i] += (x[j] - xp[j])*(x[j] - xp[j]);
1191 void TPrincipal::Test(Option_t *)
1193 MakeHistograms(
"pca",
"S");
1199 if (fHistograms) pca_s = (TH1*)fHistograms->FindObject(
"pca_s");
1201 Warning(
"Test",
"Couldn't get histogram of square residuals");
1215 void TPrincipal::X2P(
const Double_t *x, Double_t *p)
1217 for (Int_t i = 0; i < fNumberOfVariables; i++){
1219 for (Int_t j = 0; j < fNumberOfVariables; j++)
1220 p[i] += (x[j] - fMeanValues(j)) * fEigenVectors(j,i) /
1221 (fIsNormalised ? fSigmas(j) : 1);