Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RLoopManager.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RLOOPMANAGER
12 #define ROOT_RLOOPMANAGER
13 
14 #include "ROOT/RDF/RNodeBase.hxx"
15 #include "ROOT/RDF/NodesUtils.hxx"
16 
17 #include <functional>
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <vector>
22 
23 // forward declarations
24 class TTreeReader;
25 
26 namespace ROOT {
27 namespace RDF {
28 class RCutFlowReport;
29 class RDataSource;
30 } // ns RDF
31 
32 namespace Internal {
33 namespace RDF {
34 ColumnNames_t GetBranchNames(TTree &t, bool allowDuplicates = true);
35 
36 class RActionBase;
37 class GraphNode;
38 
39 namespace GraphDrawing {
40 class GraphCreatorHelper;
41 } // ns GraphDrawing
42 } // ns RDF
43 } // ns Internal
44 
45 namespace Detail {
46 namespace RDF {
47 using namespace ROOT::TypeTraits;
48 namespace RDFInternal = ROOT::Internal::RDF;
49 
50 class RCustomColumnBase;
51 class RFilterBase;
52 class RRangeBase;
53 
54 /// The head node of a RDF computation graph.
55 /// This class is responsible of running the event loop.
56 class RLoopManager : public RNodeBase {
57  using RDataSource = ROOT::RDF::RDataSource;
58  enum class ELoopType { kROOTFiles, kROOTFilesMT, kNoFiles, kNoFilesMT, kDataSource, kDataSourceMT };
59  using Callback_t = std::function<void(unsigned int)>;
60  class TCallback {
61  const Callback_t fFun;
62  const ULong64_t fEveryN;
63  std::vector<ULong64_t> fCounters;
64 
65  public:
66  TCallback(ULong64_t everyN, Callback_t &&f, unsigned int nSlots)
67  : fFun(std::move(f)), fEveryN(everyN), fCounters(nSlots, 0ull)
68  {
69  }
70 
71  void operator()(unsigned int slot)
72  {
73  auto &c = fCounters[slot];
74  ++c;
75  if (c == fEveryN) {
76  c = 0ull;
77  fFun(slot);
78  }
79  }
80  };
81 
82  class TOneTimeCallback {
83  const Callback_t fFun;
84  std::vector<int> fHasBeenCalled; // std::vector<bool> is thread-unsafe for our purposes (and generally evil)
85 
86  public:
87  TOneTimeCallback(Callback_t &&f, unsigned int nSlots) : fFun(std::move(f)), fHasBeenCalled(nSlots, 0) {}
88 
89  void operator()(unsigned int slot)
90  {
91  if (fHasBeenCalled[slot] == 1)
92  return;
93  fFun(slot);
94  fHasBeenCalled[slot] = 1;
95  }
96  };
97 
98  std::vector<RDFInternal::RActionBase *> fBookedActions; ///< Non-owning pointers to actions to be run
99  std::vector<RDFInternal::RActionBase *> fRunActions; ///< Non-owning pointers to actions already run
100  std::vector<RFilterBase *> fBookedFilters;
101  std::vector<RFilterBase *> fBookedNamedFilters; ///< Contains a subset of fBookedFilters, i.e. only the named filters
102  std::vector<RRangeBase *> fBookedRanges;
103 
104  /// Shared pointer to the input TTree. It does not delete the pointee if the TTree/TChain was passed directly as an
105  /// argument to RDataFrame's ctor (in which case we let users retain ownership).
106  std::shared_ptr<TTree> fTree{nullptr};
107  const ColumnNames_t fDefaultColumns;
108  const ULong64_t fNEmptyEntries{0};
109  const unsigned int fNSlots{1};
110  bool fMustRunNamedFilters{true};
111  const ELoopType fLoopType; ///< The kind of event loop that is going to be run (e.g. on ROOT files, on no files)
112  std::string fToJitDeclare; ///< Code that should be just-in-time declared right before the event loop
113  std::string fToJitExec; ///< Code that should be just-in-time executed right before the event loop
114  const std::unique_ptr<RDataSource> fDataSource; ///< Owning pointer to a data-source object. Null if no data-source
115  std::map<std::string, std::string> fAliasColumnNameMap; ///< ColumnNameAlias-columnName pairs
116  std::vector<TCallback> fCallbacks; ///< Registered callbacks
117  std::vector<TOneTimeCallback> fCallbacksOnce; ///< Registered callbacks to invoke just once before running the loop
118  /// A unique ID that identifies the computation graph that starts with this RLoopManager.
119  /// Used, for example, to jit objects in a namespace reserved for this computation graph
120  const unsigned int fID = GetNextID();
121 
122  std::vector<RCustomColumnBase *> fCustomColumns; ///< Non-owning container of all custom columns created so far.
123  /// Cache of the tree/chain branch names. Never access directy, always use GetBranchNames().
124  ColumnNames_t fValidBranchNames;
125 
126  void CheckIndexedFriends();
127  void RunEmptySourceMT();
128  void RunEmptySource();
129  void RunTreeProcessorMT();
130  void RunTreeReader();
131  void RunDataSourceMT();
132  void RunDataSource();
133  void RunAndCheckFilters(unsigned int slot, Long64_t entry);
134  void InitNodeSlots(TTreeReader *r, unsigned int slot);
135  void InitNodes();
136  void CleanUpNodes();
137  void CleanUpTask(unsigned int slot);
138  void EvalChildrenCounts();
139  static unsigned int GetNextID();
140 
141 public:
142  RLoopManager(TTree *tree, const ColumnNames_t &defaultBranches);
143  RLoopManager(ULong64_t nEmptyEntries);
144  RLoopManager(std::unique_ptr<RDataSource> ds, const ColumnNames_t &defaultBranches);
145  RLoopManager(const RLoopManager &) = delete;
146  RLoopManager &operator=(const RLoopManager &) = delete;
147 
148  void JitDeclarations();
149  void Jit();
150  RLoopManager *GetLoopManagerUnchecked() final { return this; }
151  void Run();
152  const ColumnNames_t &GetDefaultColumnNames() const;
153  TTree *GetTree() const;
154  ::TDirectory *GetDirectory() const;
155  ULong64_t GetNEmptyEntries() const { return fNEmptyEntries; }
156  RDataSource *GetDataSource() const { return fDataSource.get(); }
157  void Book(RDFInternal::RActionBase *actionPtr);
158  void Deregister(RDFInternal::RActionBase *actionPtr);
159  void Book(RFilterBase *filterPtr);
160  void Deregister(RFilterBase *filterPtr);
161  void Book(RRangeBase *rangePtr);
162  void Deregister(RRangeBase *rangePtr);
163  bool CheckFilters(unsigned int, Long64_t) final;
164  unsigned int GetNSlots() const { return fNSlots; }
165  void Report(ROOT::RDF::RCutFlowReport &rep) const final;
166  /// End of recursive chain of calls, does nothing
167  void PartialReport(ROOT::RDF::RCutFlowReport &) const final {}
168  void SetTree(const std::shared_ptr<TTree> &tree) { fTree = tree; }
169  void IncrChildrenCount() final { ++fNChildren; }
170  void StopProcessing() final { ++fNStopsReceived; }
171  void ToJitDeclare(const std::string &s) { fToJitDeclare.append(s); }
172  void ToJitExec(const std::string &s) { fToJitExec.append(s); }
173  void AddColumnAlias(const std::string &alias, const std::string &colName) { fAliasColumnNameMap[alias] = colName; }
174  const std::map<std::string, std::string> &GetAliasMap() const { return fAliasColumnNameMap; }
175  void RegisterCallback(ULong64_t everyNEvents, std::function<void(unsigned int)> &&f);
176  unsigned int GetID() const { return fID; }
177 
178  /// End of recursive chain of calls, does nothing
179  void AddFilterName(std::vector<std::string> &) {}
180  /// For each booked filter, returns either the name or "Unnamed Filter"
181  std::vector<std::string> GetFiltersNames();
182 
183  /// For all the actions, either booked or run
184  std::vector<RDFInternal::RActionBase *> GetAllActions();
185 
186  void RegisterCustomColumn(RCustomColumnBase *column) { fCustomColumns.push_back(column); }
187 
188  void DeRegisterCustomColumn(RCustomColumnBase *column)
189  {
190  fCustomColumns.erase(std::remove(fCustomColumns.begin(), fCustomColumns.end(), column), fCustomColumns.end());
191  }
192 
193  std::vector<RDFInternal::RActionBase *> GetBookedActions() { return fBookedActions; }
194  std::shared_ptr<ROOT::Internal::RDF::GraphDrawing::GraphNode> GetGraph();
195 
196  const ColumnNames_t &GetBranchNames();
197 };
198 
199 } // ns RDF
200 } // ns Detail
201 } // ns ROOT
202 
203 #endif