Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RooNLLVar.cxx
Go to the documentation of this file.
1 /*****************************************************************************
2  * Project: RooFit *
3  * Package: RooFitCore *
4  * @(#)root/roofitcore:$Id$
5  * Authors: *
6  * WV, Wouter Verkerke, UC Santa Barbara, verkerke@slac.stanford.edu *
7  * DK, David Kirkby, UC Irvine, dkirkby@uci.edu *
8  * *
9  * Copyright (c) 2000-2005, Regents of the University of California *
10  * and Stanford University. All rights reserved. *
11  * *
12  * Redistribution and use in source and binary forms, *
13  * with or without modification, are permitted according to the terms *
14  * listed in LICENSE (http://roofit.sourceforge.net/license.txt) *
15  *****************************************************************************/
16 
17 /**
18 \file RooNLLVar.cxx
19 \class RooNLLVar
20 \ingroup Roofitcore
21 
22 Class RooNLLVar implements a -log(likelihood) calculation from a dataset
23 and a PDF. The NLL is calculated as
24 \f[
25  \sum_\mathrm{data} -\log( \mathrmp{pdf}(x_\mathrm{data})
26 \f]
27 In extended mode, a
28 \f$ N_mathrm{expect} - N_mathrm{observed}*log(N_mathrm{expect}) \f$ term is added.
29 **/
30 
31 #include "RooNLLVar.h"
32 
33 #include "RooFit.h"
34 #include "Riostream.h"
35 #include "TMath.h"
36 
37 #include "RooAbsData.h"
38 #include "RooAbsPdf.h"
39 #include "RooCmdConfig.h"
40 #include "RooMsgService.h"
41 #include "RooAbsDataStore.h"
42 #include "RooRealMPFE.h"
43 #include "RooRealSumPdf.h"
44 #include "RooRealVar.h"
45 #include "RooProdPdf.h"
46 #include "RooHelpers.h"
47 
48 #include "Math/Util.h"
49 
50 #include <algorithm>
51 
52 ClassImp(RooNLLVar)
53 
54 RooArgSet RooNLLVar::_emptySet ;
55 
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
59 ///
60 /// Argument | Description
61 /// -------------------------|------------
62 /// Extended() | Include extended term in calculation
63 /// NumCPU() | Activate parallel processing feature
64 /// Range() | Fit only selected region
65 /// SumCoefRange() | Set the range in which to interpret the coefficients of RooAddPdf components
66 /// SplitRange() | Fit range is split by index catory of simultaneous PDF
67 /// ConditionalObservables() | Define conditional observables
68 /// Verbose() | Verbose output of GOF framework classes
69 /// CloneData() | Clone input dataset for internal use (default is kTRUE)
70 /// BatchMode() | Evaluate batches of data events (faster if PDFs support it)
71 
72 RooNLLVar::RooNLLVar(const char *name, const char* title, RooAbsPdf& pdf, RooAbsData& indata,
73  const RooCmdArg& arg1, const RooCmdArg& arg2,const RooCmdArg& arg3,
74  const RooCmdArg& arg4, const RooCmdArg& arg5,const RooCmdArg& arg6,
75  const RooCmdArg& arg7, const RooCmdArg& arg8,const RooCmdArg& arg9) :
76  RooAbsOptTestStatistic(name,title,pdf,indata,
77  *(const RooArgSet*)RooCmdConfig::decodeObjOnTheFly("RooNLLVar::RooNLLVar","ProjectedObservables",0,&_emptySet
78  ,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
79  RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","RangeWithName",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9).c_str(),
80  RooCmdConfig::decodeStringOnTheFly("RooNLLVar::RooNLLVar","AddCoefRange",0,"",arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9).c_str(),
81  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","NumCPU",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
82  RooFit::BulkPartition,
83  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","Verbose",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
84  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","SplitRange",0,0,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9),
85  RooCmdConfig::decodeIntOnTheFly("RooNLLVar::RooNLLVar","CloneData",0,1,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
86 {
87  RooCmdConfig pc("RooNLLVar::RooNLLVar") ;
88  pc.allowUndefined() ;
89  pc.defineInt("extended","Extended",0,kFALSE) ;
90  pc.defineInt("BatchMode", "BatchMode", 0, false);
91 
92  pc.process(arg1) ; pc.process(arg2) ; pc.process(arg3) ;
93  pc.process(arg4) ; pc.process(arg5) ; pc.process(arg6) ;
94  pc.process(arg7) ; pc.process(arg8) ; pc.process(arg9) ;
95 
96  _extended = pc.getInt("extended") ;
97  _batchEvaluations = pc.getInt("BatchMode");
98  _weightSq = kFALSE ;
99  _first = kTRUE ;
100  _offset = 0.;
101  _offsetCarry = 0.;
102  _offsetSaveW2 = 0.;
103  _offsetCarrySaveW2 = 0.;
104 
105  _binnedPdf = 0 ;
106 }
107 
108 
109 
110 ////////////////////////////////////////////////////////////////////////////////
111 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
112 /// For internal use.
113 
114 RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
115  Bool_t extended, const char* rangeName, const char* addCoefRangeName,
116  Int_t nCPU, RooFit::MPSplit interleave, Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
117  RooAbsOptTestStatistic(name,title,pdf,indata,RooArgSet(),rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
118  _extended(extended),
119  _weightSq(kFALSE),
120  _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
121 {
122  // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
123  // for a binned likelihood calculation
124  _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
125 
126  // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
127  if (_binnedPdf) {
128 
129  // The Active label will disable pdf integral calculations
130  _binnedPdf->setAttribute("BinnedLikelihoodActive") ;
131 
132  RooArgSet* obs = _funcClone->getObservables(_dataClone) ;
133  if (obs->getSize()!=1) {
134  _binnedPdf = 0 ;
135  } else {
136  RooRealVar* var = (RooRealVar*) obs->first() ;
137  std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
138  std::list<Double_t>::iterator biter = boundaries->begin() ;
139  _binw.resize(boundaries->size()-1) ;
140  Double_t lastBound = (*biter) ;
141  ++biter ;
142  int ibin=0 ;
143  while (biter!=boundaries->end()) {
144  _binw[ibin] = (*biter) - lastBound ;
145  lastBound = (*biter) ;
146  ibin++ ;
147  ++biter ;
148  }
149  }
150  }
151 }
152 
153 
154 
155 ////////////////////////////////////////////////////////////////////////////////
156 /// Construct likelihood from given p.d.f and (binned or unbinned dataset)
157 /// For internal use.
158 
159 RooNLLVar::RooNLLVar(const char *name, const char *title, RooAbsPdf& pdf, RooAbsData& indata,
160  const RooArgSet& projDeps, Bool_t extended, const char* rangeName,const char* addCoefRangeName,
161  Int_t nCPU,RooFit::MPSplit interleave,Bool_t verbose, Bool_t splitRange, Bool_t cloneData, Bool_t binnedL) :
162  RooAbsOptTestStatistic(name,title,pdf,indata,projDeps,rangeName,addCoefRangeName,nCPU,interleave,verbose,splitRange,cloneData),
163  _extended(extended),
164  _weightSq(kFALSE),
165  _first(kTRUE), _offsetSaveW2(0.), _offsetCarrySaveW2(0.)
166 {
167  // If binned likelihood flag is set, pdf is a RooRealSumPdf representing a yield vector
168  // for a binned likelihood calculation
169  _binnedPdf = binnedL ? (RooRealSumPdf*)_funcClone : 0 ;
170 
171  // Retrieve and cache bin widths needed to convert unnormalized binnedPdf values back to yields
172  if (_binnedPdf) {
173 
174  RooArgSet* obs = _funcClone->getObservables(_dataClone) ;
175  if (obs->getSize()!=1) {
176  _binnedPdf = 0 ;
177  } else {
178  RooRealVar* var = (RooRealVar*) obs->first() ;
179  std::list<Double_t>* boundaries = _binnedPdf->binBoundaries(*var,var->getMin(),var->getMax()) ;
180  std::list<Double_t>::iterator biter = boundaries->begin() ;
181  _binw.resize(boundaries->size()-1) ;
182  Double_t lastBound = (*biter) ;
183  ++biter ;
184  int ibin=0 ;
185  while (biter!=boundaries->end()) {
186  _binw[ibin] = (*biter) - lastBound ;
187  lastBound = (*biter) ;
188  ibin++ ;
189  ++biter ;
190  }
191  }
192  }
193 }
194 
195 
196 
197 ////////////////////////////////////////////////////////////////////////////////
198 /// Copy constructor
199 
200 RooNLLVar::RooNLLVar(const RooNLLVar& other, const char* name) :
201  RooAbsOptTestStatistic(other,name),
202  _extended(other._extended),
203  _batchEvaluations(other._batchEvaluations),
204  _weightSq(other._weightSq),
205  _first(kTRUE), _offsetSaveW2(other._offsetSaveW2),
206  _offsetCarrySaveW2(other._offsetCarrySaveW2),
207  _binw(other._binw) {
208  _binnedPdf = other._binnedPdf ? (RooRealSumPdf*)_funcClone : 0 ;
209 }
210 
211 
212 
213 
214 ////////////////////////////////////////////////////////////////////////////////
215 /// Destructor
216 
217 RooNLLVar::~RooNLLVar()
218 {
219 }
220 
221 
222 
223 
224 ////////////////////////////////////////////////////////////////////////////////
225 
226 void RooNLLVar::applyWeightSquared(Bool_t flag)
227 {
228  if (_gofOpMode==Slave) {
229  if (flag != _weightSq) {
230  _weightSq = flag;
231  std::swap(_offset, _offsetSaveW2);
232  std::swap(_offsetCarry, _offsetCarrySaveW2);
233  }
234  setValueDirty();
235  } else if ( _gofOpMode==MPMaster) {
236  for (Int_t i=0 ; i<_nCPU ; i++)
237  _mpfeArray[i]->applyNLLWeightSquared(flag);
238  } else if ( _gofOpMode==SimMaster) {
239  for (Int_t i=0 ; i<_nGof ; i++)
240  ((RooNLLVar*)_gofArray[i])->applyWeightSquared(flag);
241  }
242 }
243 
244 
245 ////////////////////////////////////////////////////////////////////////////////
246 /// Calculate and return likelihood on subset of data.
247 /// \param[in] firstEvent First event to be processed.
248 /// \param[in] lastEvent First event not to be processed, any more.
249 /// \param[in] stepSize Steps between events.
250 /// \note For batch computations, the step size **must** be one.
251 ///
252 /// If this an extended likelihood, the extended term is added to the return likelihood
253 /// in the batch that encounters the event with index 0.
254 
255 Double_t RooNLLVar::evaluatePartition(std::size_t firstEvent, std::size_t lastEvent, std::size_t stepSize) const
256 {
257  // Throughout the calculation, we use Kahan's algorithm for summing to
258  // prevent loss of precision - this is a factor four more expensive than
259  // straight addition, but since evaluating the PDF is usually much more
260  // expensive than that, we tolerate the additional cost...
261  double result(0), carry(0), sumWeight(0);
262 
263  RooAbsPdf* pdfClone = (RooAbsPdf*) _funcClone ;
264 
265  // cout << "RooNLLVar::evaluatePartition(" << GetName() << ") projDeps = " << (_projDeps?*_projDeps:RooArgSet()) << endl ;
266 
267  _dataClone->store()->recalculateCache( _projDeps, firstEvent, lastEvent, stepSize, (_binnedPdf?kFALSE:kTRUE) ) ;
268 
269 
270 
271  // If pdf is marked as binned - do a binned likelihood calculation here (sum of log-Poisson for each bin)
272  if (_binnedPdf) {
273  double sumWeightCarry = 0.;
274  for (auto i=firstEvent ; i<lastEvent ; i+=stepSize) {
275 
276  _dataClone->get(i) ;
277 
278  if (!_dataClone->valid()) continue;
279 
280  Double_t eventWeight = _dataClone->weight();
281 
282 
283  // Calculate log(Poisson(N|mu) for this bin
284  Double_t N = eventWeight ;
285  Double_t mu = _binnedPdf->getVal()*_binw[i] ;
286  //cout << "RooNLLVar::binnedL(" << GetName() << ") N=" << N << " mu = " << mu << endl ;
287 
288  if (mu<=0 && N>0) {
289 
290  // Catch error condition: data present where zero events are predicted
291  logEvalError(Form("Observed %f events in bin %lu with zero event yield",N,(unsigned long)i)) ;
292 
293  } else if (fabs(mu)<1e-10 && fabs(N)<1e-10) {
294 
295  // Special handling of this case since log(Poisson(0,0)=0 but can't be calculated with usual log-formula
296  // since log(mu)=0. No update of result is required since term=0.
297 
298  } else {
299 
300  Double_t term = -1*(-mu + N*log(mu) - TMath::LnGamma(N+1)) ;
301 
302  // TODO replace by Math::KahanSum
303  // Kahan summation of sumWeight
304  Double_t y = eventWeight - sumWeightCarry;
305  Double_t t = sumWeight + y;
306  sumWeightCarry = (t - sumWeight) - y;
307  sumWeight = t;
308 
309  // Kahan summation of result
310  y = term - carry;
311  t = result + y;
312  carry = (t - result) - y;
313  result = t;
314  }
315  }
316 
317 
318  } else { //unbinned PDF
319 
320  if (_batchEvaluations) {
321  std::tie(result, carry, sumWeight) = computeBatched(stepSize, firstEvent, lastEvent);
322 #ifdef ROOFIT_CHECK_CACHED_VALUES
323 
324  double resultScalar, carryScalar, sumWeightScalar;
325  std::tie(resultScalar, carryScalar, sumWeightScalar) =
326  computeScalar(stepSize, firstEvent, lastEvent);
327 
328  constexpr bool alwaysPrint = false;
329 
330  if (alwaysPrint || fabs(result - resultScalar)/resultScalar > 1.E-15) {
331  std::cerr << "RooNLLVar: result is off\n\t" << std::setprecision(15) << result
332  << "\n\t" << resultScalar << std::endl;
333  }
334 
335  if (alwaysPrint || fabs(carry - carryScalar)/carryScalar > 10.) {
336  std::cerr << "RooNLLVar: carry is far off\n\t" << std::setprecision(15) << carry
337  << "\n\t" << carryScalar << std::endl;
338  }
339 
340  if (alwaysPrint || fabs(sumWeight - sumWeightScalar)/sumWeightScalar > 1.E-15) {
341  std::cerr << "RooNLLVar: sumWeight is off\n\t" << std::setprecision(15) << sumWeight
342  << "\n\t" << sumWeightScalar << std::endl;
343  }
344 
345 #endif
346  } else { //scalar mode
347  std::tie(result, carry, sumWeight) = computeScalar(stepSize, firstEvent, lastEvent);
348  }
349 
350  // include the extended maximum likelihood term, if requested
351  if(_extended && _setNum==_extSet) {
352  if (_weightSq) {
353 
354  // TODO Batch this up
355  // Calculate sum of weights-squared here for extended term
356  Double_t sumW2(0), sumW2carry(0);
357  for (decltype(_dataClone->numEntries()) i = 0; i < _dataClone->numEntries() ; i++) {
358  _dataClone->get(i);
359  Double_t y = _dataClone->weightSquared() - sumW2carry;
360  Double_t t = sumW2 + y;
361  sumW2carry = (t - sumW2) - y;
362  sumW2 = t;
363  }
364 
365  Double_t expected= pdfClone->expectedEvents(_dataClone->get());
366 
367  // Adjust calculation of extended term with W^2 weighting: adjust poisson such that
368  // estimate of Nexpected stays at the same value, but has a different variance, rescale
369  // both the observed and expected count of the Poisson with a factor sum[w] / sum[w^2] which is
370  // the effective weight of the Poisson term.
371  // i.e. change Poisson(Nobs = sum[w]| Nexp ) --> Poisson( sum[w] * sum[w] / sum[w^2] | Nexp * sum[w] / sum[w^2] )
372  // weighted by the effective weight sum[w^2]/ sum[w] in the likelihood.
373  // Since here we compute the likelihood with the weight square we need to multiply by the
374  // square of the effective weight
375  // expectedW = expected * sum[w] / sum[w^2] : effective expected entries
376  // observedW = sum[w] * sum[w] / sum[w^2] : effective observed entries
377  // The extended term for the likelihood weighted by the square of the weight will be then:
378  // (sum[w^2]/ sum[w] )^2 * expectedW - (sum[w^2]/ sum[w] )^2 * observedW * log (expectedW) and this is
379  // using the previous expressions for expectedW and observedW
380  // sum[w^2] / sum[w] * expected - sum[w^2] * log (expectedW)
381  // and since the weights are constants in the likelihood we can use log(expected) instead of log(expectedW)
382 
383  Double_t expectedW2 = expected * sumW2 / _dataClone->sumEntries() ;
384  Double_t extra= expectedW2 - sumW2*log(expected );
385 
386  // Double_t y = pdfClone->extendedTerm(sumW2, _dataClone->get()) - carry;
387 
388  Double_t y = extra - carry ;
389 
390  Double_t t = result + y;
391  carry = (t - result) - y;
392  result = t;
393  } else {
394  Double_t y = pdfClone->extendedTerm(_dataClone->sumEntries(), _dataClone->get()) - carry;
395  Double_t t = result + y;
396  carry = (t - result) - y;
397  result = t;
398  }
399  }
400  } //unbinned PDF
401 
402 
403  // If part of simultaneous PDF normalize probability over
404  // number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
405  if (_simCount>1) {
406  Double_t y = sumWeight*log(1.0*_simCount) - carry;
407  Double_t t = result + y;
408  carry = (t - result) - y;
409  result = t;
410  }
411 
412 
413  // At the end of the first full calculation, wire the caches
414  if (_first) {
415  _first = kFALSE ;
416  _funcClone->wireAllCaches() ;
417  }
418 
419 
420  // Check if value offset flag is set.
421  if (_doOffset) {
422 
423  // If no offset is stored enable this feature now
424  if (_offset==0 && result !=0 ) {
425  coutI(Minimization) << "RooNLLVar::evaluatePartition(" << GetName() << ") first = "<< firstEvent << " last = " << lastEvent << " Likelihood offset now set to " << result << std::endl ;
426  _offset = result ;
427  _offsetCarry = carry;
428  }
429 
430  // Subtract offset
431  Double_t y = -_offset - (carry + _offsetCarry);
432  Double_t t = result + y;
433  carry = (t - result) - y;
434  result = t;
435  }
436 
437 
438  _evalCarry = carry;
439  return result ;
440 }
441 
442 
443 std::tuple<double, double, double> RooNLLVar::computeBatched(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const
444 {
445  if (stepSize != 1) {
446  throw std::invalid_argument(std::string("Error in ") + __FILE__ + ": Step size for batch computations can only be 1.");
447  }
448 
449  auto pdfClone = static_cast<const RooAbsPdf*>(_funcClone);
450 
451  auto results = pdfClone->getLogValBatch(firstEvent, lastEvent-firstEvent, _normSet);
452 
453 
454 #ifdef ROOFIT_CHECK_CACHED_VALUES
455  for (std::size_t evtNo = firstEvent; evtNo < lastEvent; ++evtNo) {
456  _dataClone->get(evtNo);
457  assert(_dataClone->valid());
458  pdfClone->getValV(_normSet);
459  try {
460  RooHelpers::BatchInterfaceAccessor::checkBatchComputation(*pdfClone, evtNo, _normSet);
461  } catch (std::exception& e) {
462  std::cerr << "ERROR when checking batch computation for event " << evtNo << ":\n"
463  << e.what() << std::endl;
464  }
465  }
466 #endif
467 
468 
469  // Compute sum of event weights. First check if we need squared weights
470  const RooSpan<const double> eventWeights = _dataClone->getWeightBatch(firstEvent, lastEvent);
471  //Make it obvious for the optimiser that the switch will never change while looping
472  const bool retrieveSquaredWeights = _weightSq;
473  auto retrieveWeight = [&eventWeights, retrieveSquaredWeights](std::size_t i) {
474  if (retrieveSquaredWeights)
475  return eventWeights[i] * eventWeights[i];
476  else
477  return eventWeights[i];
478  };
479 
480  //Sum the event weights
481  ROOT::Math::KahanSum<double, 4u> kahanWeight;
482  if (eventWeights.size() == 1) {
483  kahanWeight.Add( (lastEvent - firstEvent) * retrieveWeight(0));
484  } else {
485  for (std::size_t i = 0; i < eventWeights.size(); ++i) {
486  kahanWeight.AddIndexed(retrieveWeight(i), i);
487  }
488  }
489 
490 
491  //Sum the probabilities
492  ROOT::Math::KahanSum<double, 4u> kahanProb;
493  if (eventWeights.size() == 1) {
494  const double weight = retrieveWeight(0);
495  for (std::size_t i = 0; i < results.size(); ++i) {
496  kahanProb.AddIndexed(-weight * results[i], i);
497  }
498  } else {
499  for (std::size_t i = 0; i < results.size(); ++i) {
500  kahanProb.AddIndexed(-retrieveWeight(i) * results[i], i);
501  }
502  }
503 
504 
505  return std::tuple<double, double, double>{kahanProb.Sum(), kahanProb.Carry(), kahanWeight.Sum()};
506 }
507 
508 
509 std::tuple<double, double, double> RooNLLVar::computeScalar(std::size_t stepSize, std::size_t firstEvent, std::size_t lastEvent) const {
510  auto pdfClone = static_cast<const RooAbsPdf*>(_funcClone);
511 
512  ROOT::Math::KahanSum<double> kahanWeight;
513  ROOT::Math::KahanSum<double> kahanProb;
514 
515  for (auto i=firstEvent; i<lastEvent; i+=stepSize) {
516  _dataClone->get(i) ;
517 
518  if (!_dataClone->valid()) continue;
519 
520  Double_t eventWeight = _dataClone->weight(); //FIXME
521  if (0. == eventWeight * eventWeight) continue ;
522  if (_weightSq) eventWeight = _dataClone->weightSquared() ;
523 
524  const double term = -eventWeight * pdfClone->getLogVal(_normSet);
525 
526  kahanWeight.Add(eventWeight);
527  kahanProb.Add(term);
528  }
529 
530  return std::tuple<double, double, double>{kahanProb.Sum(), kahanProb.Carry(), kahanWeight.Sum()};
531 }