Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TSimpleAnalysis.cxx
Go to the documentation of this file.
1 // @(#)root/treeplayer:$Id$
2 // Author: Luca Giommi 22/08/16
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2016, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #include "TSimpleAnalysis.h"
13 
14 #include "TFile.h"
15 #include "TChain.h"
16 #include "TChainElement.h"
17 #include "TH1.h"
18 #include "TError.h"
19 #include "TKey.h"
20 #ifdef R__USE_IMT
21 #include "ROOT/TThreadExecutor.hxx"
22 #endif
23 #include "TROOT.h"
24 
25 #include <string>
26 #include <fstream>
27 #include <vector>
28 #include <map>
29 #include <iostream>
30 
31 /** \class TSimpleAnalysis
32 
33 A TSimpleAnalysis object creates histograms from a TChain. These histograms
34 are stored to an output file. The histogrammed (TTreeFormula) expressions,
35 their cuts, the input and output files are configured through a simple config
36 file that allows comments starting with '#'.
37 Here an example of configuration file:
38 
39 ~~~ {.cpp}
40 # This is an example of configuration file
41 file_output.root #the output file in which histograms are stored
42 
43 # The next line has the name of the tree of the input data. It is
44 # optional if there is exactly one tree in the first input file.
45 ntuple #name of the input tree
46 
47 # The lines of the next block correspond to .root input files that
48 # contain the tree
49 hsimple1.root #first .root input file
50 hsimple2.root #second .root input file
51 
52 # The next block is composed by lines that allow to configure the
53 # histograms. They have the following syntax:
54 # NAME = EXPRESSION if CUT
55 # which corresponds to chain->Draw("EXPRESSION >> NAME", "CUT")
56 # i.e. it will create a histogram called NAME and store it in
57 # file_output.root.
58 # "if CUT" is optional
59 hpx=px if px<-3 #first histogram
60 hpxpy=px:py #second histogram
61 
62 # End of the configuration file
63 ~~~
64 
65 It is possible to use the script rootdrawtree that allows to use the class
66 just in command line through the bash shell.
67 */
68 
69 ////////////////////////////////////////////////////////////////////////////////
70 /// Delete comments, leading and trailing white spaces in a string.
71 ///
72 /// param[in] line - line read from the input file
73 
74 static void DeleteCommentsAndSpaces(std::string& line)
75 {
76  // Delete comments
77  std::size_t comment = line.find('#');
78  line = line.substr(0, comment);
79  // Delete leading spaces
80  std::size_t firstNotSpace = line.find_first_not_of(" \t");
81  if (firstNotSpace != std::string::npos)
82  line = line.substr(firstNotSpace);
83  else {
84  line.clear();
85  return;
86  }
87  // Delete trailing spaces
88  std::size_t lastNotSpace = line.find_last_not_of(" \t");
89  if (lastNotSpace != std::string::npos)
90  line = line.substr(0, lastNotSpace + 1);
91 }
92 
93 ////////////////////////////////////////////////////////////////////////////////
94 /// Handle the expression lines of the input file in order to pass the
95 /// elements to the members of the object.
96 ///
97 /// param[in] line - TTreeFormula expression, either read form the configuration
98 /// file or passed as expression to the constructor
99 
100 std::string TSimpleAnalysis::HandleExpressionConfig(const std::string& line)
101 {
102  static const std::string kCutIntr = " if ";
103 
104  std::size_t equal = line.find("=");
105  if (equal == std::string::npos)
106  return "Error: missing '='";
107 
108  // Set the histName value
109  std::string histName = line.substr(0, equal);
110  DeleteCommentsAndSpaces(histName);
111  if (histName.empty())
112  return "Error: no histName found";
113 
114  //Set the histExpression value
115  std::size_t cutPos = line.find(kCutIntr, equal);
116  std::string histExpression;
117  if (cutPos == std::string::npos)
118  histExpression = line.substr(equal + 1);
119  else
120  histExpression = line.substr(equal + 1, cutPos - equal - 1);
121  DeleteCommentsAndSpaces(histExpression);
122  if (histExpression.empty())
123  return "Error: no expression found";
124 
125  // Set the histCut value
126  std::string histCut;
127  if (cutPos != std::string::npos) {
128  histCut = line.substr(cutPos + kCutIntr.size());
129  DeleteCommentsAndSpaces(histCut);
130  if (histCut.empty())
131  return "Error: missing cut expression after 'if'";
132  }
133  else
134  histCut = "";
135 
136  // Set the map that contains the histName, histExpressions and histCut values
137  auto check = fHists.insert(std::make_pair((const std::string&)histName,
138  std::make_pair(histExpression, histCut)));
139 
140  // Check if there are histograms with the same name
141  if (!check.second)
142  return "Duplicate histogram name";
143  return "";
144 }
145 
146 ////////////////////////////////////////////////////////////////////////////////
147 /// Constructor for the case of command line parsing arguments. It sets the members
148 /// of the object.
149 ///
150 /// \param[in] output - name of the output file
151 /// \param[in] inputFiles - name of the input .root files
152 /// \param[in] expressions - what is shown in the histograms
153 /// \param[in] treeName - name of the tree
154 /// \throws std::runtime_error in case of ill-formed expressions
155 
156 TSimpleAnalysis::TSimpleAnalysis(const std::string& output,
157  const std::vector<std::string>& inputFiles,
158  const std::vector<std::string>& expressions,
159  const std::string& treeName = ""):
160  fInputFiles(inputFiles), fOutputFile(output), fTreeName(treeName)
161 {
162  for (const std::string& expr: expressions) {
163  std::string errMessage = HandleExpressionConfig(expr);
164  if (!errMessage.empty())
165  throw std::runtime_error(errMessage + " in " + expr);
166  }
167 }
168 
169 ////////////////////////////////////////////////////////////////////////////////
170 /// Extract the name of the tree from the first input file when the tree name
171 /// isn't in the configuration file. Returns the name of the tree.
172 
173 static std::string ExtractTreeName(std::string& firstInputFile)
174 {
175  std::string treeName = "";
176  std::unique_ptr<TFile> inputFile{TFile::Open(firstInputFile.c_str())};
177 
178  // Loop over all the keys inside the first input file
179  for (TObject* keyAsObj : *inputFile->GetListOfKeys()) {
180  TKey* key = static_cast<TKey*>(keyAsObj);
181  TClass* clObj = TClass::GetClass(key->GetClassName());
182  if (!clObj)
183  continue;
184  // If the key is related to and object that inherits from TTree::Class we
185  // set treeName with the name of this key if treeName is empty, otherwise
186  // error occurs
187  if (clObj->InheritsFrom(TTree::Class())) {
188  if (treeName.empty())
189  treeName = key->GetName();
190  else {
191  ::Error("TSimpleAnalysis::Analyze", "Multiple trees inside %s", firstInputFile.c_str());
192  return "";
193  }
194  }
195  }
196  // If treeName is yet empty, error occurs
197  if (treeName.empty()) {
198  ::Error("TSimpleAnalysis::Analyze", "No tree inside %s", firstInputFile.c_str());
199  return "";
200  }
201  return treeName;
202 }
203 
204 ////////////////////////////////////////////////////////////////////////////////
205 /// Returns true if there are no errors in TChain::LoadTree()
206 
207 static bool CheckChainLoadResult(TChain* chain)
208 {
209  // Possible return values of TChain::LoadTree()
210  static const char* errors[] {
211  "all good", // 0
212  "empty chain", // -1
213  "invalid entry number", // -2
214  "cannot open the file", // -3
215  "missing tree", // -4
216  "internal error" // -5
217  };
218 
219  bool ret = true;
220  TObjArray *fileElements = chain->GetListOfFiles();
221  TIter next(fileElements);
222  while (TChainElement* chEl = (TChainElement*)next()) {
223  if (chEl->GetLoadResult() < 0) {
224  ::Error("TSimpleAnalysis::Run", "Load failure in file %s: %s",
225  chEl->GetTitle(), errors[-(chEl->GetLoadResult())]);
226  ret = false;
227  }
228  }
229  return ret;
230 }
231 
232 ////////////////////////////////////////////////////////////////////////////////
233 /// Disambiguate tree name from first input file and set up fTreeName if it is
234 /// empty
235 
236 bool TSimpleAnalysis::SetTreeName()
237 {
238  // Disambiguate tree name from first input file:
239  // just try to open it, if that works it's an input file.
240  if (!fTreeName.empty()) {
241  // Silence possible error message from TFile constructor if this is a tree name.
242  int oldLevel = gErrorIgnoreLevel;
243  gErrorIgnoreLevel = kFatal;
244  if (TFile* probe = TFile::Open(fTreeName.c_str())) {
245  if (!probe->IsZombie()) {
246  fInputFiles.insert(fInputFiles.begin(), fTreeName);
247  fTreeName.clear();
248  }
249  delete probe;
250  }
251  gErrorIgnoreLevel = oldLevel;
252  }
253  // If fTreeName is empty we try to find the name of the tree through reading
254  // of the first input file
255  if (fTreeName.empty())
256  fTreeName = ExtractTreeName(fInputFiles[0]);
257  if (fTreeName.empty()) // No tree name found
258  return false;
259  return true;
260 }
261 
262 ////////////////////////////////////////////////////////////////////////////////
263 /// Execute all the TChain::Draw() as configured and stores the output histograms.
264 /// Returns true if the analysis succeeds.
265 
266 bool TSimpleAnalysis::Run()
267 {
268  if (!SetTreeName())
269  return false;
270 
271  // Create the output file and check if it fails
272  TFile ofile(fOutputFile.c_str(), "RECREATE");
273  if (ofile.IsZombie()) {
274  ::Error("TSimpleAnalysis::Run", "Impossible to create %s", fOutputFile.c_str());
275  return false;
276  }
277 
278  // Store the histograms into a vector
279  auto generateHisto = [&](const std::pair<TChain*, TDirectory*>& job) {
280  TChain* chain = job.first;
281  TDirectory* taskDir = job.second;
282  taskDir->cd();
283  std::vector<TH1F *> vPtrHisto(fHists.size());
284  // Index for a correct set up of vPtrHisto
285  int i = 0;
286 
287  // Loop over all the histograms
288  for (const auto &histo : fHists) {
289  const std::string& expr = histo.second.first;
290  const std::string& histoName = histo.first;
291  const std::string& cut = histo.second.second;
292 
293  chain->Draw((expr + ">>" + histoName).c_str(), cut.c_str(), "goff");
294  TH1F *ptrHisto = (TH1F*)taskDir->Get(histoName.c_str());
295 
296  // Check if there are errors inside the chain
297  if (!CheckChainLoadResult(chain))
298  return std::vector<TH1F *>();
299 
300  vPtrHisto[i] = ptrHisto;
301  ++i;
302  }
303  return vPtrHisto;
304  };
305 
306 #if 0
307  // The MT version is currently disabled because reading emulated objects
308  // triggers a lock for every object read. This in turn increases the run
309  // time way beyond the serial case.
310 
311 
312  ROOT::EnableThreadSafety();
313  ROOT::TThreadExecutor pool(8);
314 
315  // Do the chain of the fInputFiles
316  std::vector<std::pair<TChain*, TDirectory*>> vChains;
317  for (size_t i = 0; i < fInputFiles.size(); ++i){
318  const std::string& inputfile = fInputFiles[i];
319  TChain *ch;
320  ch = new TChain(fTreeName.c_str());
321  ch->Add(inputfile.c_str());
322 
323  // Create task-specific TDirectory, so avoid parallel tasks to interfere
324  // in gDirectory with histogram registration.
325  TDirectory* taskDir = gROOT->mkdir(TString::Format("TSimpleAnalysis_taskDir_%d", (int)i));
326 
327  vChains.emplace_back(std::make_pair(ch, taskDir));
328  }
329 
330  auto vFileswHists = pool.Map(generateHisto, vChains);
331 
332  // If a file does not exist, one of the vFileswHists
333  // will be a vector of length 0. Detect that.
334  for (auto&& histsOfJob: vFileswHists) {
335  if (histsOfJob.empty())
336  return false;
337  }
338 
339  // Merge the results. Initialize the result with the first task's results,
340  // then add the other tasks.
341  std::vector<TH1F *> vPtrHisto{vFileswHists[0]};
342  ofile.cd();
343  for (unsigned j = 0; j < fHists.size(); j++) {
344  for (unsigned i = 1; i < vFileswHists.size(); i++) {
345  if (!vFileswHists[i][j]) {
346  // ignore that sum histogram:
347  delete vPtrHisto[j];
348  vPtrHisto[j] = nullptr;
349  continue;
350  }
351  if (vPtrHisto[j])
352  vPtrHisto[j]->Add(vFileswHists[i][j]);
353  }
354  if (vPtrHisto[j])
355  vPtrHisto[j]->Write();
356  }
357  return true;
358 
359 #else
360 
361  // Do the chain of the fInputFiles
362  TChain* chain = new TChain(fTreeName.c_str());
363  for (const std::string& inputfile: fInputFiles)
364  chain->Add(inputfile.c_str());
365 
366  // Generate histograms
367  auto vHisto = generateHisto({chain, gDirectory});
368  if (vHisto.empty())
369  return false;
370  ofile.cd();
371  // Store the histograms
372  for (auto histo: vHisto) {
373  if (histo)
374  histo->Write();
375  }
376  return true;
377 
378 #endif
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 /// Returns false if not a tree name, otherwise sets the name of the tree.
383 ///
384 /// param[in] line - line read from the input file
385 
386 bool TSimpleAnalysis::HandleInputFileNameConfig(const std::string& line)
387 {
388  if (line.find("=") == std::string::npos) {
389  fInputFiles.push_back(line);
390  return true;
391  }
392  return false; // It's an expression
393 }
394 
395 ////////////////////////////////////////////////////////////////////////////////
396 /// Skip subsequent empty lines read from fIn and returns the next not empty line.
397 ///
398 /// param[in] numbLine - number of the input file line
399 
400 std::string TSimpleAnalysis::GetLine(int& numbLine)
401 {
402  std::string notEmptyLine;
403 
404  do {
405  getline(fIn, notEmptyLine);
406  DeleteCommentsAndSpaces(notEmptyLine);
407  numbLine++;
408  } while (fIn && notEmptyLine.empty());
409 
410  return notEmptyLine;
411 }
412 
413 ////////////////////////////////////////////////////////////////////////////////
414 /// This function has the aim of setting the arguments read from the input file.
415 
416 bool TSimpleAnalysis::Configure()
417 {
418  int readingSection = kReadingOutput;
419  std::string line;
420  int numbLine = 0;
421 
422  // Error if the input file does not exist
423  fIn.open(fConfigFile);
424  if (!fIn) {
425  ::Error("TSimpleAnalysis::Configure", "File %s not found", fConfigFile.c_str());
426  return false;
427  }
428 
429  while (!fIn.eof()) {
430  line = GetLine(numbLine);
431  if (line.empty()) // It can happen if fIn.eof()
432  continue;
433  std::string errMessage;
434 
435  switch (readingSection) {
436 
437  // Set the name of the output file
438  case kReadingOutput:
439  fOutputFile = line;
440  readingSection++;
441  break;
442 
443  // Set the name of the tree
444  case kReadingTreeName:
445  fTreeName = line;
446  readingSection++;
447  break;
448 
449  // Set the input files
450  case kReadingInput:
451  if (!HandleInputFileNameConfig(line)) {
452  // Not an input file name; try to parse as an expression
453  errMessage = HandleExpressionConfig(line);
454  readingSection = kReadingExpressions;
455  }
456  break;
457 
458  // Set the expressions
459  case kReadingExpressions:
460  errMessage = HandleExpressionConfig(line);
461  break;
462  }
463 
464  // Report any errors if occur during the configuration proceedings
465  if (!errMessage.empty()) {
466  ::Error("TSimpleAnalysis::Configure", "%s in %s:%d", errMessage.c_str(),
467  fConfigFile.c_str(), numbLine);
468  return false;
469  }
470  } // while (!fIn.eof())
471  return true;
472 }
473 
474 ////////////////////////////////////////////////////////////////////////////////
475 /// Function that allows to create the TSimpleAnalysis object and execute its
476 /// Configure and Analyze functions.
477 ///
478 /// param[in] configurationFile - name of the input file used to create the TSimpleAnalysis object
479 
480 bool RunSimpleAnalysis (const char* configurationFile) {
481  TSimpleAnalysis obj(configurationFile);
482  if (!obj.Configure())
483  return false;
484  if (!obj.Run())
485  return false;
486  return true; // Return true only if Configure() and Run() functions were performed correctly
487 }