Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
rf611_weightedfits.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_roofit
3 /// \notebook -js
4 /// Likelihood and minimization: Parameter uncertainties for weighted unbinned ML fits
5 ///
6 /// \macro_image
7 /// \macro_output
8 /// \macro_code
9 /// \author 11/2019 - Christoph Langenbruch
10 
11 #include "TH1D.h"
12 #include "TCanvas.h"
13 #include "TROOT.h"
14 #include "TStyle.h"
15 #include "TRandom3.h"
16 #include "TLegend.h"
17 #include "RooRealVar.h"
18 #include "RooFitResult.h"
19 #include "RooDataSet.h"
20 #include "RooPolynomial.h"
21 
22 using namespace RooFit;
23 
24 
25 int rf611_weightedfits(int acceptancemodel=2) {
26  // P a r a m e t e r u n c e r t a i n t i e s f o r w e i g h t e d u n b i n n e d M L f i t s
27  // ---------------------------------------------------------------------------------------------------------
28  //
29  //Based on example from https://arxiv.org/abs/1911.01303
30  //
31  //This example compares different approaches to determining parameter uncertainties in weighted unbinned maximum likelihood fits.
32  //Performing a weighted unbinned maximum likelihood fits can be useful to account for acceptance effects and to statistically subtract background events using the sPlot formalism.
33  //It is however well known that the inverse Hessian matrix does not yield parameter uncertainties with correct coverage in the presence of event weights.
34  //Three approaches to the determination of parameter uncertainties are compared in this example:
35  //
36  //1. Using the inverse weighted Hessian matrix [SumW2Error(false)]
37  //
38  //2. Using the expression [SumW2Error(true)]
39  // V_{ij} = H_{ik}^{-1} C_{kl} H_{lj}^{-1}
40  // where H is the weighted Hessian matrix and C is the Hessian matrix with squared weights
41  //
42  //3. The asymptotically correct approach (for details please see https://arxiv.org/abs/1911.01303) [Asymptotic(true)]
43  // V_{ij} = H_{ik}^{-1} D_{kl} H_{lj}^{-1}
44  // where H is the weighted Hessian matrix and D is given by
45  // D_{kl} = sum_{e=1}^{N} w_e^2 \frac{\partial log P}{\partial lambda_k}\frac{\partial log P}{\partial lambda_l}
46  // with the event weight w_e
47  //
48  //The example performs the fit of a second order polynomial in the angle cos(theta) [-1,1] to a weighted data set.
49  //The polynomial is given by
50  // P = (1 + c0*cos(theta) + c1*cos(theta)*cos(theta)) / Norm
51  //The two coefficients c0 and c1 and their uncertainties are to be determined in the fit.
52  //
53  //The per-event weight is used to correct for an acceptance effect, two different acceptance models can be studied:
54  // acceptancemodel==1: eff = 0.3+0.7*cos(theta)*cos(theta)
55  // acceptancemodel==2: eff = 1.0-0.7*cos(theta)*cos(theta)
56  //The data is generated to be flat before the acceptance effect.
57  //
58  //The performance of the different approaches to determine parameter uncertainties is compared using the pull distributions from a large number of pseudoexperiments.
59  //The pull is defined as (lambda_i-\lambda_{gen})/\sigma(\lambda_i), where \lambda_i is the fitted parameter and \sigma(\lambda_i) its uncertainty for pseudoexperiment number i.
60  //If the fit is unbiased and the parameter uncertainties are estimated correctly, the pull distribution should be a Gaussian centered around zero with a width of one.
61 
62 
63  // I n i t i a l i s a t i o n a n d S e t u p
64  //------------------------------------------------
65 
66  //plotting options
67  gStyle->SetPaintTextFormat(".1f");
68  gStyle->SetEndErrorSize(6.0);
69  gStyle->SetTitleSize(0.05, "XY");
70  gStyle->SetLabelSize(0.05, "XY");
71  gStyle->SetTitleOffset(0.9, "XY");
72  gStyle->SetTextSize(0.05);
73  gStyle->SetPadLeftMargin(0.125);
74  gStyle->SetPadBottomMargin(0.125);
75  gStyle->SetPadTopMargin(0.075);
76  gStyle->SetPadRightMargin(0.075);
77  gStyle->SetMarkerStyle(20);
78  gStyle->SetMarkerSize(1.0);
79  gStyle->SetHistLineWidth(2.0);
80  gStyle->SetHistLineColor(1);
81 
82  //initialise TRandom3
83  TRandom3* rnd = new TRandom3();
84  rnd->SetSeed(191101303);
85 
86  //accepted events and events weighted to account for the acceptance
87  TH1D* haccepted = new TH1D("haccepted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);
88  TH1D* hweighted = new TH1D("hweighted", "Generated events;cos(#theta);#events", 40, -1.0, 1.0);
89  //histograms holding pull distributions
90  //using the inverse Hessian matrix
91  TH1D* hc0pull1 = new TH1D("hc0pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
92  TH1D* hc1pull1 = new TH1D("hc1pull1", "Inverse weighted Hessian matrix [SumW2Error(false)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
93  //using the correction with the Hessian matrix with squared weights
94  TH1D* hc0pull2 = new TH1D("hc0pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
95  TH1D* hc1pull2 = new TH1D("hc1pull2", "Hessian matrix with squared weights [SumW2Error(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
96  //asymptotically correct approach
97  TH1D* hc0pull3 = new TH1D("hc0pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{0}^{fit}-c_{0}^{gen})/#sigma(c_{0});", 20, -5.0, 5.0);
98  TH1D* hc1pull3 = new TH1D("hc1pull3", "Asymptotically correct approach [Asymptotic(true)];Pull (c_{1}^{fit}-c_{1}^{gen})/#sigma(c_{1});", 20, -5.0, 5.0);
99 
100  //number of pseudoexperiments (toys) and number of events per pseudoexperiment
101  unsigned int ntoys = 500;
102  unsigned int nstats = 5000;
103  //parameters used in the generation
104  double c0gen = 0.0;
105  double c1gen = 0.0;
106 
107  // M a i n l o o p o v e r a l l p s e u d o e x p e r i m e n t s
108  //------------------------------------------------------------------------
109  for (unsigned int i=0; i<ntoys; i++)
110  {
111  //S e t u p p a r a m e t e r s a n d P D F
112  //-----------------------------------------------
113  //angle theta and the weight to account for the acceptance effect
114  RooRealVar costheta("costheta","costheta", -1.0, 1.0);
115  RooRealVar weight("weight","weight", 0.0, 1000.0);
116 
117  //initialise parameters to fit
118  RooRealVar c0("c0","c0", c0gen, -1.0, 1.0);
119  RooRealVar c1("c1","c1", c1gen, -1.0, 1.0);
120  c0.setError(0.01);
121  c1.setError(0.01);
122  //create simple second order polynomial as probability density function
123  RooPolynomial pol("pol", "pol", costheta, RooArgList(c0, c1), 1);
124 
125  //G e n e r a t e d a t a s e t f o r p s e u d o e x p e r i m e n t i
126  //-------------------------------------------------------------------------------
127  RooDataSet data("data","data",RooArgSet(costheta, weight), WeightVar("weight"));
128  //generate nstats events
129  for (unsigned int j=0; j<nstats; j++)
130  {
131  bool finished = false;
132  //use simple accept/reject for generation
133  while (!finished)
134  {
135  costheta = 2.0*rnd->Rndm()-1.0;
136  //efficiency for the specific value of cos(theta)
137  double eff = 1.0;
138  if (acceptancemodel == 1)
139  eff = 1.0 - 0.7 * costheta.getValV()*costheta.getValV();
140  else
141  eff = 0.3 + 0.7 * costheta.getValV()*costheta.getValV();
142  //use 1/eff as weight to account for acceptance
143  weight = 1.0/eff;
144  //accept/reject
145  if (10.0*rnd->Rndm() < eff*pol.getValV())
146  finished = true;
147  }
148  haccepted->Fill(costheta.getValV());
149  hweighted->Fill(costheta.getValV(), weight.getValV());
150  data.add(RooArgSet(costheta, weight), weight.getValV());
151  }
152 
153  //F i t t o y u s i n g t h e t h r e e d i f f e r e n t a p p r o a c h e s t o u n c e r t a i n t y d e t e r m i n a t i o n
154  //-------------------------------------------------------------------------------------------------------------------------------------------------
155  RooFitResult* result = pol.fitTo(data, Save(true), SumW2Error(false));//this uses the inverse weighted Hessian matrix
156  hc0pull1->Fill((c0.getValV()-c0gen)/c0.getError());
157  hc1pull1->Fill((c1.getValV()-c1gen)/c1.getError());
158 
159  result = pol.fitTo(data, Save(true), SumW2Error(true));//this uses the correction with the Hesse matrix with squared weights
160  hc0pull2->Fill((c0.getValV()-c0gen)/c0.getError());
161  hc1pull2->Fill((c1.getValV()-c1gen)/c1.getError());
162 
163  result = pol.fitTo(data, Save(true), AsymptoticError(true));//this uses the asymptotically correct approach
164  hc0pull3->Fill((c0.getValV()-c0gen)/c0.getError());
165  hc1pull3->Fill((c1.getValV()-c1gen)/c1.getError());
166  }
167 
168  // P l o t o u t p u t d i s t r i b u t i o n s
169  //--------------------------------------------------
170 
171  //plot accepted (weighted) events
172  gStyle->SetOptStat(0);
173  gStyle->SetOptFit(0);
174  TCanvas* cevents = new TCanvas("cevents", "cevents", 800, 600);
175  cevents->cd(1);
176  hweighted->SetMinimum(0.0);
177  hweighted->SetLineColor(2);
178  hweighted->Draw("hist");
179  haccepted->Draw("same hist");
180  TLegend* leg = new TLegend(0.6, 0.8, 0.9, 0.9);
181  leg->AddEntry(haccepted, "Accepted");
182  leg->AddEntry(hweighted, "Weighted");
183  leg->Draw();
184  cevents->Update();
185 
186  //plot pull distributions
187  TCanvas* cpull = new TCanvas("cpull", "cpull", 1200, 800);
188  cpull->Divide(3,2);
189  cpull->cd(1);
190  gStyle->SetOptStat(1100);
191  gStyle->SetOptFit(11);
192  hc0pull1->Fit("gaus");
193  hc0pull1->Draw("ep");
194  cpull->cd(2);
195  hc0pull2->Fit("gaus");
196  hc0pull2->Draw("ep");
197  cpull->cd(3);
198  hc0pull3->Fit("gaus");
199  hc0pull3->Draw("ep");
200  cpull->cd(4);
201  hc1pull1->Fit("gaus");
202  hc1pull1->Draw("ep");
203  cpull->cd(5);
204  hc1pull2->Fit("gaus");
205  hc1pull2->Draw("ep");
206  cpull->cd(6);
207  hc1pull3->Fit("gaus");
208  hc1pull3->Draw("ep");
209  cpull->Update();
210 
211  return 0;
212 }