Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
ActionHelpers.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 12/2016
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RDFOPERATIONS
12 #define ROOT_RDFOPERATIONS
13 
14 #include "Compression.h"
16 #include "ROOT/RStringView.hxx"
17 #include "ROOT/RVec.hxx"
18 #include "ROOT/TBufferMerger.hxx" // for SnapshotHelper
20 #include "ROOT/RDF/Utils.hxx"
21 #include "ROOT/RMakeUnique.hxx"
23 #include "ROOT/TypeTraits.hxx"
24 #include "ROOT/RDF/RDisplay.hxx"
25 #include "RtypesCore.h"
26 #include "TBranch.h"
27 #include "TClassEdit.h"
28 #include "TDirectory.h"
29 #include "TFile.h" // for SnapshotHelper
30 #include "TH1.h"
31 #include "TGraph.h"
32 #include "TLeaf.h"
33 #include "TObjArray.h"
34 #include "TObject.h"
35 #include "TTree.h"
36 #include "TTreeReader.h" // for SnapshotHelper
37 
38 #include <algorithm>
39 #include <limits>
40 #include <memory>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits>
44 #include <vector>
45 #include <iomanip>
46 
47 /// \cond HIDDEN_SYMBOLS
48 
49 namespace ROOT {
50 namespace Detail {
51 namespace RDF {
52 template <typename Helper>
53 class RActionImpl {
54 public:
55  // call Helper::FinalizeTask if present, do nothing otherwise
56  template <typename T = Helper>
57  auto CallFinalizeTask(unsigned int slot) -> decltype(&T::FinalizeTask, void())
58  {
59  static_cast<Helper *>(this)->FinalizeTask(slot);
60  }
61 
62  template <typename... Args>
63  void CallFinalizeTask(unsigned int, Args...) {}
64 
65 };
66 
67 } // namespace RDF
68 } // namespace Detail
69 
70 namespace Internal {
71 namespace RDF {
72 using namespace ROOT::TypeTraits;
73 using namespace ROOT::VecOps;
74 using namespace ROOT::RDF;
75 using namespace ROOT::Detail::RDF;
76 
77 using Hist_t = ::TH1D;
78 
79 /// The container type for each thread's partial result in an action helper
80 // We have to avoid to instantiate std::vector<bool> as that makes it impossible to return a reference to one of
81 // the thread-local results. In addition, a common definition for the type of the container makes it easy to swap
82 // the type of the underlying container if e.g. we see problems with false sharing of the thread-local results..
83 template <typename T>
84 using Results = typename std::conditional<std::is_same<T, bool>::value, std::deque<T>, std::vector<T>>::type;
85 
86 template <typename F>
87 class ForeachSlotHelper : public RActionImpl<ForeachSlotHelper<F>> {
88  F fCallable;
89 
90 public:
91  using ColumnTypes_t = RemoveFirstParameter_t<typename CallableTraits<F>::arg_types>;
92  ForeachSlotHelper(F &&f) : fCallable(f) {}
93  ForeachSlotHelper(ForeachSlotHelper &&) = default;
94  ForeachSlotHelper(const ForeachSlotHelper &) = delete;
95 
96  void InitTask(TTreeReader *, unsigned int) {}
97 
98  template <typename... Args>
99  void Exec(unsigned int slot, Args &&... args)
100  {
101  // check that the decayed types of Args are the same as the branch types
102  static_assert(std::is_same<TypeList<typename std::decay<Args>::type...>, ColumnTypes_t>::value, "");
103  fCallable(slot, std::forward<Args>(args)...);
104  }
105 
106  void Initialize() { /* noop */}
107 
108  void Finalize() { /* noop */}
109 
110  std::string GetActionName() { return "ForeachSlot"; }
111 };
112 
113 class CountHelper : public RActionImpl<CountHelper> {
114  const std::shared_ptr<ULong64_t> fResultCount;
115  Results<ULong64_t> fCounts;
116 
117 public:
118  using ColumnTypes_t = TypeList<>;
119  CountHelper(const std::shared_ptr<ULong64_t> &resultCount, const unsigned int nSlots);
120  CountHelper(CountHelper &&) = default;
121  CountHelper(const CountHelper &) = delete;
122  void InitTask(TTreeReader *, unsigned int) {}
123  void Exec(unsigned int slot);
124  void Initialize() { /* noop */}
125  void Finalize();
126  ULong64_t &PartialUpdate(unsigned int slot);
127 
128  std::string GetActionName() { return "Count"; }
129 };
130 
131 template <typename ProxiedVal_t>
132 class ReportHelper : public RActionImpl<ReportHelper<ProxiedVal_t>> {
133  const std::shared_ptr<RCutFlowReport> fReport;
134  // Here we have a weak pointer since we need to keep track of the validity
135  // of the proxied node. It can happen that the user does not trigger the
136  // event loop by looking into the RResultPtr and the chain goes out of scope
137  // before the Finalize method is invoked.
138  std::weak_ptr<ProxiedVal_t> fProxiedWPtr;
139  bool fReturnEmptyReport;
140 
141 public:
142  using ColumnTypes_t = TypeList<>;
143  ReportHelper(const std::shared_ptr<RCutFlowReport> &report, const std::shared_ptr<ProxiedVal_t> &pp, bool emptyRep)
144  : fReport(report), fProxiedWPtr(pp), fReturnEmptyReport(emptyRep){};
145  ReportHelper(ReportHelper &&) = default;
146  ReportHelper(const ReportHelper &) = delete;
147  void InitTask(TTreeReader *, unsigned int) {}
148  void Exec(unsigned int /* slot */) {}
149  void Initialize() { /* noop */}
150  void Finalize()
151  {
152  // We need the weak_ptr in order to avoid crashes at tear down
153  if (!fReturnEmptyReport && !fProxiedWPtr.expired())
154  fProxiedWPtr.lock()->Report(*fReport);
155  }
156 
157  std::string GetActionName() { return "Report"; }
158 };
159 
160 class FillHelper : public RActionImpl<FillHelper> {
161  // this sets a total initial size of 16 MB for the buffers (can increase)
162  static constexpr unsigned int fgTotalBufSize = 2097152;
163  using BufEl_t = double;
164  using Buf_t = std::vector<BufEl_t>;
165 
166  std::vector<Buf_t> fBuffers;
167  std::vector<Buf_t> fWBuffers;
168  const std::shared_ptr<Hist_t> fResultHist;
169  unsigned int fNSlots;
170  unsigned int fBufSize;
171  /// Histograms containing "snapshots" of partial results. Non-null only if a registered callback requires it.
172  Results<std::unique_ptr<Hist_t>> fPartialHists;
173  Buf_t fMin;
174  Buf_t fMax;
175 
176  void UpdateMinMax(unsigned int slot, double v);
177 
178 public:
179  FillHelper(const std::shared_ptr<Hist_t> &h, const unsigned int nSlots);
180  FillHelper(FillHelper &&) = default;
181  FillHelper(const FillHelper &) = delete;
182  void InitTask(TTreeReader *, unsigned int) {}
183  void Exec(unsigned int slot, double v);
184  void Exec(unsigned int slot, double v, double w);
185 
186  template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
187  void Exec(unsigned int slot, const T &vs)
188  {
189  auto &thisBuf = fBuffers[slot];
190  for (auto &v : vs) {
191  UpdateMinMax(slot, v);
192  thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
193  }
194  }
195 
196  template <typename T, typename W,
197  typename std::enable_if<IsContainer<T>::value && IsContainer<W>::value, int>::type = 0>
198  void Exec(unsigned int slot, const T &vs, const W &ws)
199  {
200  auto &thisBuf = fBuffers[slot];
201 
202  for (auto &v : vs) {
203  UpdateMinMax(slot, v);
204  thisBuf.emplace_back(v);
205  }
206 
207  auto &thisWBuf = fWBuffers[slot];
208  for (auto &w : ws) {
209  thisWBuf.emplace_back(w); // TODO: Can be optimised in case T == BufEl_t
210  }
211  }
212 
213  template <typename T, typename W,
214  typename std::enable_if<IsContainer<T>::value && !IsContainer<W>::value, int>::type = 0>
215  void Exec(unsigned int slot, const T &vs, const W w)
216  {
217  auto &thisBuf = fBuffers[slot];
218  for (auto &v : vs) {
219  UpdateMinMax(slot, v);
220  thisBuf.emplace_back(v); // TODO: Can be optimised in case T == BufEl_t
221  }
222 
223  auto &thisWBuf = fWBuffers[slot];
224  thisWBuf.insert(thisWBuf.end(), vs.size(), w);
225  }
226 
227  // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
228  template <typename T, typename W,
229  typename std::enable_if<IsContainer<W>::value && !IsContainer<T>::value, int>::type = 0>
230  void Exec(unsigned int, const T &, const W &)
231  {
232  throw std::runtime_error(
233  "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
234  }
235 
236  Hist_t &PartialUpdate(unsigned int);
237 
238  void Initialize() { /* noop */}
239 
240  void Finalize();
241 
242  std::string GetActionName() { return "Fill"; }
243 };
244 
245 extern template void FillHelper::Exec(unsigned int, const std::vector<float> &);
246 extern template void FillHelper::Exec(unsigned int, const std::vector<double> &);
247 extern template void FillHelper::Exec(unsigned int, const std::vector<char> &);
248 extern template void FillHelper::Exec(unsigned int, const std::vector<int> &);
249 extern template void FillHelper::Exec(unsigned int, const std::vector<unsigned int> &);
250 extern template void FillHelper::Exec(unsigned int, const std::vector<float> &, const std::vector<float> &);
251 extern template void FillHelper::Exec(unsigned int, const std::vector<double> &, const std::vector<double> &);
252 extern template void FillHelper::Exec(unsigned int, const std::vector<char> &, const std::vector<char> &);
253 extern template void FillHelper::Exec(unsigned int, const std::vector<int> &, const std::vector<int> &);
254 extern template void
255 FillHelper::Exec(unsigned int, const std::vector<unsigned int> &, const std::vector<unsigned int> &);
256 
257 template <typename HIST = Hist_t>
258 class FillParHelper : public RActionImpl<FillParHelper<HIST>> {
259  std::vector<HIST *> fObjects;
260 
261 public:
262  FillParHelper(FillParHelper &&) = default;
263  FillParHelper(const FillParHelper &) = delete;
264 
265  FillParHelper(const std::shared_ptr<HIST> &h, const unsigned int nSlots) : fObjects(nSlots, nullptr)
266  {
267  fObjects[0] = h.get();
268  // Initialise all other slots
269  for (unsigned int i = 1; i < nSlots; ++i) {
270  fObjects[i] = new HIST(*fObjects[0]);
271  if (auto objAsHist = dynamic_cast<TH1*>(fObjects[i])) {
272  objAsHist->SetDirectory(nullptr);
273  }
274  }
275  }
276 
277  void InitTask(TTreeReader *, unsigned int) {}
278 
279  void Exec(unsigned int slot, double x0) // 1D histos
280  {
281  fObjects[slot]->Fill(x0);
282  }
283 
284  void Exec(unsigned int slot, double x0, double x1) // 1D weighted and 2D histos
285  {
286  fObjects[slot]->Fill(x0, x1);
287  }
288 
289  void Exec(unsigned int slot, double x0, double x1, double x2) // 2D weighted and 3D histos
290  {
291  fObjects[slot]->Fill(x0, x1, x2);
292  }
293 
294  void Exec(unsigned int slot, double x0, double x1, double x2, double x3) // 3D weighted histos
295  {
296  fObjects[slot]->Fill(x0, x1, x2, x3);
297  }
298 
299  template <typename X0, typename std::enable_if<IsContainer<X0>::value, int>::type = 0>
300  void Exec(unsigned int slot, const X0 &x0s)
301  {
302  auto thisSlotH = fObjects[slot];
303  for (auto &x0 : x0s) {
304  thisSlotH->Fill(x0); // TODO: Can be optimised in case T == vector<double>
305  }
306  }
307 
308  // ROOT-10092: Filling with a scalar as first column and a collection as second is not supported
309  template <typename X0, typename X1,
310  typename std::enable_if<IsContainer<X1>::value && !IsContainer<X0>::value, int>::type = 0>
311  void Exec(unsigned int , const X0 &, const X1 &)
312  {
313  throw std::runtime_error(
314  "Cannot fill object if the type of the first column is a scalar and the one of the second a container.");
315  }
316 
317  template <typename X0, typename X1,
318  typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value, int>::type = 0>
319  void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
320  {
321  auto thisSlotH = fObjects[slot];
322  if (x0s.size() != x1s.size()) {
323  throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
324  }
325  auto x0sIt = std::begin(x0s);
326  const auto x0sEnd = std::end(x0s);
327  auto x1sIt = std::begin(x1s);
328  for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
329  thisSlotH->Fill(*x0sIt, *x1sIt); // TODO: Can be optimised in case T == vector<double>
330  }
331  }
332 
333  template <typename X0, typename W,
334  typename std::enable_if<IsContainer<X0>::value && !IsContainer<W>::value, int>::type = 0>
335  void Exec(unsigned int slot, const X0 &x0s, const W w)
336  {
337  auto thisSlotH = fObjects[slot];
338  for (auto &&x : x0s) {
339  thisSlotH->Fill(x, w);
340  }
341  }
342 
343  template <typename X0, typename X1, typename X2,
344  typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value,
345  int>::type = 0>
346  void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s)
347  {
348  auto thisSlotH = fObjects[slot];
349  if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
350  throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
351  }
352  auto x0sIt = std::begin(x0s);
353  const auto x0sEnd = std::end(x0s);
354  auto x1sIt = std::begin(x1s);
355  auto x2sIt = std::begin(x2s);
356  for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
357  thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt); // TODO: Can be optimised in case T == vector<double>
358  }
359  }
360 
361  template <typename X0, typename X1, typename W,
362  typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && !IsContainer<W>::value,
363  int>::type = 0>
364  void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const W w)
365  {
366  auto thisSlotH = fObjects[slot];
367  if (x0s.size() != x1s.size()) {
368  throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
369  }
370  auto x0sIt = std::begin(x0s);
371  const auto x0sEnd = std::end(x0s);
372  auto x1sIt = std::begin(x1s);
373  for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
374  thisSlotH->Fill(*x0sIt, *x1sIt, w); // TODO: Can be optimised in case T == vector<double>
375  }
376  }
377 
378  template <typename X0, typename X1, typename X2, typename X3,
379  typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
380  IsContainer<X3>::value,
381  int>::type = 0>
382  void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const X3 &x3s)
383  {
384  auto thisSlotH = fObjects[slot];
385  if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size() && x1s.size() == x3s.size())) {
386  throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
387  }
388  auto x0sIt = std::begin(x0s);
389  const auto x0sEnd = std::end(x0s);
390  auto x1sIt = std::begin(x1s);
391  auto x2sIt = std::begin(x2s);
392  auto x3sIt = std::begin(x3s);
393  for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++, x3sIt++) {
394  thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, *x3sIt); // TODO: Can be optimised in case T == vector<double>
395  }
396  }
397 
398  template <typename X0, typename X1, typename X2, typename W,
399  typename std::enable_if<IsContainer<X0>::value && IsContainer<X1>::value && IsContainer<X2>::value &&
400  !IsContainer<W>::value,
401  int>::type = 0>
402  void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s, const X2 &x2s, const W w)
403  {
404  auto thisSlotH = fObjects[slot];
405  if (!(x0s.size() == x1s.size() && x1s.size() == x2s.size())) {
406  throw std::runtime_error("Cannot fill histogram with values in containers of different sizes.");
407  }
408  auto x0sIt = std::begin(x0s);
409  const auto x0sEnd = std::end(x0s);
410  auto x1sIt = std::begin(x1s);
411  auto x2sIt = std::begin(x2s);
412  for (; x0sIt != x0sEnd; x0sIt++, x1sIt++, x2sIt++) {
413  thisSlotH->Fill(*x0sIt, *x1sIt, *x2sIt, w);
414  }
415  }
416 
417  void Initialize() { /* noop */}
418 
419  void Finalize()
420  {
421  auto resObj = fObjects[0];
422  const auto nSlots = fObjects.size();
423  TList l;
424  l.SetOwner(); // The list will free the memory associated to its elements upon destruction
425  for (unsigned int slot = 1; slot < nSlots; ++slot) {
426  l.Add(fObjects[slot]);
427  }
428 
429  resObj->Merge(&l);
430  }
431 
432  HIST &PartialUpdate(unsigned int slot) { return *fObjects[slot]; }
433 
434  std::string GetActionName() { return "FillPar"; }
435 };
436 
437 class FillTGraphHelper : public ROOT::Detail::RDF::RActionImpl<FillTGraphHelper> {
438 public:
439  using Result_t = ::TGraph;
440 
441 private:
442  std::vector<::TGraph *> fGraphs;
443 
444 public:
445  FillTGraphHelper(FillTGraphHelper &&) = default;
446  FillTGraphHelper(const FillTGraphHelper &) = delete;
447 
448  // The last parameter is always false, as at the moment there is no way to propagate the parameter from the user to
449  // this method
450  FillTGraphHelper(const std::shared_ptr<::TGraph> &g, const unsigned int nSlots) : fGraphs(nSlots, nullptr)
451  {
452  fGraphs[0] = g.get();
453  // Initialise all other slots
454  for (unsigned int i = 1; i < nSlots; ++i) {
455  fGraphs[i] = new TGraph(*fGraphs[0]);
456  }
457  }
458 
459  void Initialize() {}
460  void InitTask(TTreeReader *, unsigned int) {}
461 
462  template <typename X0, typename X1,
463  typename std::enable_if<
464  ROOT::TypeTraits::IsContainer<X0>::value && ROOT::TypeTraits::IsContainer<X1>::value, int>::type = 0>
465  void Exec(unsigned int slot, const X0 &x0s, const X1 &x1s)
466  {
467  if (x0s.size() != x1s.size()) {
468  throw std::runtime_error("Cannot fill Graph with values in containers of different sizes.");
469  }
470  auto thisSlotG = fGraphs[slot];
471  auto x0sIt = std::begin(x0s);
472  const auto x0sEnd = std::end(x0s);
473  auto x1sIt = std::begin(x1s);
474  for (; x0sIt != x0sEnd; x0sIt++, x1sIt++) {
475  thisSlotG->SetPoint(thisSlotG->GetN(), *x0sIt, *x1sIt);
476  }
477  }
478 
479  template <typename X0, typename X1>
480  void Exec(unsigned int slot, X0 x0, X1 x1)
481  {
482  auto thisSlotG = fGraphs[slot];
483  thisSlotG->SetPoint(thisSlotG->GetN(), x0, x1);
484  }
485 
486  void Finalize()
487  {
488  const auto nSlots = fGraphs.size();
489  auto resGraph = fGraphs[0];
490  TList l;
491  l.SetOwner(); // The list will free the memory associated to its elements upon destruction
492  for (unsigned int slot = 1; slot < nSlots; ++slot) {
493  l.Add(fGraphs[slot]);
494  }
495  resGraph->Merge(&l);
496  }
497 
498  std::string GetActionName() { return "Graph"; }
499 
500  Result_t &PartialUpdate(unsigned int slot) { return *fGraphs[slot]; }
501 };
502 
503 // In case of the take helper we have 4 cases:
504 // 1. The column is not an RVec, the collection is not a vector
505 // 2. The column is not an RVec, the collection is a vector
506 // 3. The column is an RVec, the collection is not a vector
507 // 4. The column is an RVec, the collection is a vector
508 
509 template <typename V, typename COLL>
510 void FillColl(V&& v, COLL& c) {
511  c.emplace_back(v);
512 }
513 
514 // Use push_back for bool since some compilers do not support emplace_back.
515 template <typename COLL>
516 void FillColl(bool v, COLL& c) {
517  c.push_back(v);
518 }
519 
520 // Case 1.: The column is not an RVec, the collection is not a vector
521 // No optimisations, no transformations: just copies.
522 template <typename RealT_t, typename T, typename COLL>
523 class TakeHelper : public RActionImpl<TakeHelper<RealT_t, T, COLL>> {
524  Results<std::shared_ptr<COLL>> fColls;
525 
526 public:
527  using ColumnTypes_t = TypeList<T>;
528  TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
529  {
530  fColls.emplace_back(resultColl);
531  for (unsigned int i = 1; i < nSlots; ++i)
532  fColls.emplace_back(std::make_shared<COLL>());
533  }
534  TakeHelper(TakeHelper &&);
535  TakeHelper(const TakeHelper &) = delete;
536 
537  void InitTask(TTreeReader *, unsigned int) {}
538 
539  void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
540 
541  void Initialize() { /* noop */}
542 
543  void Finalize()
544  {
545  auto rColl = fColls[0];
546  for (unsigned int i = 1; i < fColls.size(); ++i) {
547  const auto &coll = fColls[i];
548  const auto end = coll->end();
549  // Use an explicit loop here to prevent compiler warnings introduced by
550  // clang's range-based loop analysis and vector<bool> references.
551  for (auto j = coll->begin(); j != end; j++) {
552  FillColl(*j, *rColl);
553  }
554  }
555  }
556 
557  COLL &PartialUpdate(unsigned int slot) { return *fColls[slot].get(); }
558 
559  std::string GetActionName() { return "Take"; }
560 };
561 
562 // Case 2.: The column is not an RVec, the collection is a vector
563 // Optimisations, no transformations: just copies.
564 template <typename RealT_t, typename T>
565 class TakeHelper<RealT_t, T, std::vector<T>> : public RActionImpl<TakeHelper<RealT_t, T, std::vector<T>>> {
566  Results<std::shared_ptr<std::vector<T>>> fColls;
567 
568 public:
569  using ColumnTypes_t = TypeList<T>;
570  TakeHelper(const std::shared_ptr<std::vector<T>> &resultColl, const unsigned int nSlots)
571  {
572  fColls.emplace_back(resultColl);
573  for (unsigned int i = 1; i < nSlots; ++i) {
574  auto v = std::make_shared<std::vector<T>>();
575  v->reserve(1024);
576  fColls.emplace_back(v);
577  }
578  }
579  TakeHelper(TakeHelper &&);
580  TakeHelper(const TakeHelper &) = delete;
581 
582  void InitTask(TTreeReader *, unsigned int) {}
583 
584  void Exec(unsigned int slot, T &v) { FillColl(v, *fColls[slot]); }
585 
586  void Initialize() { /* noop */}
587 
588  // This is optimised to treat vectors
589  void Finalize()
590  {
591  ULong64_t totSize = 0;
592  for (auto &coll : fColls)
593  totSize += coll->size();
594  auto rColl = fColls[0];
595  rColl->reserve(totSize);
596  for (unsigned int i = 1; i < fColls.size(); ++i) {
597  auto &coll = fColls[i];
598  rColl->insert(rColl->end(), coll->begin(), coll->end());
599  }
600  }
601 
602  std::vector<T> &PartialUpdate(unsigned int slot) { return *fColls[slot]; }
603 
604  std::string GetActionName() { return "Take"; }
605 };
606 
607 // Case 3.: The column is a RVec, the collection is not a vector
608 // No optimisations, transformations from RVecs to vectors
609 template <typename RealT_t, typename COLL>
610 class TakeHelper<RealT_t, RVec<RealT_t>, COLL> : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, COLL>> {
611  Results<std::shared_ptr<COLL>> fColls;
612 
613 public:
614  using ColumnTypes_t = TypeList<RVec<RealT_t>>;
615  TakeHelper(const std::shared_ptr<COLL> &resultColl, const unsigned int nSlots)
616  {
617  fColls.emplace_back(resultColl);
618  for (unsigned int i = 1; i < nSlots; ++i)
619  fColls.emplace_back(std::make_shared<COLL>());
620  }
621  TakeHelper(TakeHelper &&);
622  TakeHelper(const TakeHelper &) = delete;
623 
624  void InitTask(TTreeReader *, unsigned int) {}
625 
626  void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
627 
628  void Initialize() { /* noop */}
629 
630  void Finalize()
631  {
632  auto rColl = fColls[0];
633  for (unsigned int i = 1; i < fColls.size(); ++i) {
634  auto &coll = fColls[i];
635  for (auto &v : *coll) {
636  rColl->emplace_back(v);
637  }
638  }
639  }
640 
641  std::string GetActionName() { return "Take"; }
642 };
643 
644 // Case 4.: The column is an RVec, the collection is a vector
645 // Optimisations, transformations from RVecs to vectors
646 template <typename RealT_t>
647 class TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>
648  : public RActionImpl<TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>> {
649 
650  Results<std::shared_ptr<std::vector<std::vector<RealT_t>>>> fColls;
651 
652 public:
653  using ColumnTypes_t = TypeList<RVec<RealT_t>>;
654  TakeHelper(const std::shared_ptr<std::vector<std::vector<RealT_t>>> &resultColl, const unsigned int nSlots)
655  {
656  fColls.emplace_back(resultColl);
657  for (unsigned int i = 1; i < nSlots; ++i) {
658  auto v = std::make_shared<std::vector<RealT_t>>();
659  v->reserve(1024);
660  fColls.emplace_back(v);
661  }
662  }
663  TakeHelper(TakeHelper &&);
664  TakeHelper(const TakeHelper &) = delete;
665 
666  void InitTask(TTreeReader *, unsigned int) {}
667 
668  void Exec(unsigned int slot, RVec<RealT_t> av) { fColls[slot]->emplace_back(av.begin(), av.end()); }
669 
670  void Initialize() { /* noop */}
671 
672  // This is optimised to treat vectors
673  void Finalize()
674  {
675  ULong64_t totSize = 0;
676  for (auto &coll : fColls)
677  totSize += coll->size();
678  auto rColl = fColls[0];
679  rColl->reserve(totSize);
680  for (unsigned int i = 1; i < fColls.size(); ++i) {
681  auto &coll = fColls[i];
682  rColl->insert(rColl->end(), coll->begin(), coll->end());
683  }
684  }
685 
686  std::string GetActionName() { return "Take"; }
687 };
688 
689 // Extern templates for TakeHelper
690 // NOTE: The move-constructor of specializations declared as extern templates
691 // must be defined out of line, otherwise cling fails to find its symbol.
692 template <typename RealT_t, typename T, typename COLL>
693 TakeHelper<RealT_t, T, COLL>::TakeHelper(TakeHelper<RealT_t, T, COLL> &&) = default;
694 template <typename RealT_t, typename T>
695 TakeHelper<RealT_t, T, std::vector<T>>::TakeHelper(TakeHelper<RealT_t, T, std::vector<T>> &&) = default;
696 template <typename RealT_t, typename COLL>
697 TakeHelper<RealT_t, RVec<RealT_t>, COLL>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, COLL> &&) = default;
698 template <typename RealT_t>
699 TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>>::TakeHelper(TakeHelper<RealT_t, RVec<RealT_t>, std::vector<RealT_t>> &&) = default;
700 
701 // External templates are disabled for gcc5 since this version wrongly omits the C++11 ABI attribute
702 #if __GNUC__ > 5
703 extern template class TakeHelper<bool, bool, std::vector<bool>>;
704 extern template class TakeHelper<unsigned int, unsigned int, std::vector<unsigned int>>;
705 extern template class TakeHelper<unsigned long, unsigned long, std::vector<unsigned long>>;
706 extern template class TakeHelper<unsigned long long, unsigned long long, std::vector<unsigned long long>>;
707 extern template class TakeHelper<int, int, std::vector<int>>;
708 extern template class TakeHelper<long, long, std::vector<long>>;
709 extern template class TakeHelper<long long, long long, std::vector<long long>>;
710 extern template class TakeHelper<float, float, std::vector<float>>;
711 extern template class TakeHelper<double, double, std::vector<double>>;
712 #endif
713 
714 
715 template <typename ResultType>
716 class MinHelper : public RActionImpl<MinHelper<ResultType>> {
717  const std::shared_ptr<ResultType> fResultMin;
718  Results<ResultType> fMins;
719 
720 public:
721  MinHelper(MinHelper &&) = default;
722  MinHelper(const std::shared_ptr<ResultType> &minVPtr, const unsigned int nSlots)
723  : fResultMin(minVPtr), fMins(nSlots, std::numeric_limits<ResultType>::max())
724  {
725  }
726 
727  void Exec(unsigned int slot, ResultType v) { fMins[slot] = std::min(v, fMins[slot]); }
728 
729  void InitTask(TTreeReader *, unsigned int) {}
730 
731  template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
732  void Exec(unsigned int slot, const T &vs)
733  {
734  for (auto &&v : vs)
735  fMins[slot] = std::min(v, fMins[slot]);
736  }
737 
738  void Initialize() { /* noop */}
739 
740  void Finalize()
741  {
742  *fResultMin = std::numeric_limits<ResultType>::max();
743  for (auto &m : fMins)
744  *fResultMin = std::min(m, *fResultMin);
745  }
746 
747  ResultType &PartialUpdate(unsigned int slot) { return fMins[slot]; }
748 
749  std::string GetActionName() { return "Min"; }
750 };
751 
752 // TODO
753 // extern template void MinHelper::Exec(unsigned int, const std::vector<float> &);
754 // extern template void MinHelper::Exec(unsigned int, const std::vector<double> &);
755 // extern template void MinHelper::Exec(unsigned int, const std::vector<char> &);
756 // extern template void MinHelper::Exec(unsigned int, const std::vector<int> &);
757 // extern template void MinHelper::Exec(unsigned int, const std::vector<unsigned int> &);
758 
759 template <typename ResultType>
760 class MaxHelper : public RActionImpl<MaxHelper<ResultType>> {
761  const std::shared_ptr<ResultType> fResultMax;
762  Results<ResultType> fMaxs;
763 
764 public:
765  MaxHelper(MaxHelper &&) = default;
766  MaxHelper(const MaxHelper &) = delete;
767  MaxHelper(const std::shared_ptr<ResultType> &maxVPtr, const unsigned int nSlots)
768  : fResultMax(maxVPtr), fMaxs(nSlots, std::numeric_limits<ResultType>::lowest())
769  {
770  }
771 
772  void InitTask(TTreeReader *, unsigned int) {}
773  void Exec(unsigned int slot, ResultType v) { fMaxs[slot] = std::max(v, fMaxs[slot]); }
774 
775  template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
776  void Exec(unsigned int slot, const T &vs)
777  {
778  for (auto &&v : vs)
779  fMaxs[slot] = std::max((ResultType)v, fMaxs[slot]);
780  }
781 
782  void Initialize() { /* noop */}
783 
784  void Finalize()
785  {
786  *fResultMax = std::numeric_limits<ResultType>::lowest();
787  for (auto &m : fMaxs) {
788  *fResultMax = std::max(m, *fResultMax);
789  }
790  }
791 
792  ResultType &PartialUpdate(unsigned int slot) { return fMaxs[slot]; }
793 
794  std::string GetActionName() { return "Max"; }
795 };
796 
797 // TODO
798 // extern template void MaxHelper::Exec(unsigned int, const std::vector<float> &);
799 // extern template void MaxHelper::Exec(unsigned int, const std::vector<double> &);
800 // extern template void MaxHelper::Exec(unsigned int, const std::vector<char> &);
801 // extern template void MaxHelper::Exec(unsigned int, const std::vector<int> &);
802 // extern template void MaxHelper::Exec(unsigned int, const std::vector<unsigned int> &);
803 
804 template <typename ResultType>
805 class SumHelper : public RActionImpl<SumHelper<ResultType>> {
806  const std::shared_ptr<ResultType> fResultSum;
807  Results<ResultType> fSums;
808 
809  /// Evaluate neutral element for this type and the sum operation.
810  /// This is assumed to be any_value - any_value if operator- is defined
811  /// for the type, otherwise a default-constructed ResultType{} is used.
812  template <typename T = ResultType>
813  auto NeutralElement(const T &v, int /*overloadresolver*/) -> decltype(v - v)
814  {
815  return v - v;
816  }
817 
818  template <typename T = ResultType, typename Dummy = int>
819  ResultType NeutralElement(const T &, Dummy) // this overload has lower priority thanks to the template arg
820  {
821  return ResultType{};
822  }
823 
824 public:
825  SumHelper(SumHelper &&) = default;
826  SumHelper(const SumHelper &) = delete;
827  SumHelper(const std::shared_ptr<ResultType> &sumVPtr, const unsigned int nSlots)
828  : fResultSum(sumVPtr), fSums(nSlots, NeutralElement(*sumVPtr, -1))
829  {
830  }
831 
832  void InitTask(TTreeReader *, unsigned int) {}
833  void Exec(unsigned int slot, ResultType v) { fSums[slot] += v; }
834 
835  template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
836  void Exec(unsigned int slot, const T &vs)
837  {
838  for (auto &&v : vs)
839  fSums[slot] += static_cast<ResultType>(v);
840  }
841 
842  void Initialize() { /* noop */}
843 
844  void Finalize()
845  {
846  for (auto &m : fSums)
847  *fResultSum += m;
848  }
849 
850  ResultType &PartialUpdate(unsigned int slot) { return fSums[slot]; }
851 
852  std::string GetActionName() { return "Sum"; }
853 };
854 
855 class MeanHelper : public RActionImpl<MeanHelper> {
856  const std::shared_ptr<double> fResultMean;
857  std::vector<ULong64_t> fCounts;
858  std::vector<double> fSums;
859  std::vector<double> fPartialMeans;
860 
861 public:
862  MeanHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
863  MeanHelper(MeanHelper &&) = default;
864  MeanHelper(const MeanHelper &) = delete;
865  void InitTask(TTreeReader *, unsigned int) {}
866  void Exec(unsigned int slot, double v);
867 
868  template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
869  void Exec(unsigned int slot, const T &vs)
870  {
871  for (auto &&v : vs) {
872  fSums[slot] += v;
873  fCounts[slot]++;
874  }
875  }
876 
877  void Initialize() { /* noop */}
878 
879  void Finalize();
880 
881  double &PartialUpdate(unsigned int slot);
882 
883  std::string GetActionName() { return "Mean"; }
884 };
885 
886 extern template void MeanHelper::Exec(unsigned int, const std::vector<float> &);
887 extern template void MeanHelper::Exec(unsigned int, const std::vector<double> &);
888 extern template void MeanHelper::Exec(unsigned int, const std::vector<char> &);
889 extern template void MeanHelper::Exec(unsigned int, const std::vector<int> &);
890 extern template void MeanHelper::Exec(unsigned int, const std::vector<unsigned int> &);
891 
892 class StdDevHelper : public RActionImpl<StdDevHelper> {
893  // Number of subsets of data
894  const unsigned int fNSlots;
895  const std::shared_ptr<double> fResultStdDev;
896  // Number of element for each slot
897  std::vector<ULong64_t> fCounts;
898  // Mean of each slot
899  std::vector<double> fMeans;
900  // Squared distance from the mean
901  std::vector<double> fDistancesfromMean;
902 
903 public:
904  StdDevHelper(const std::shared_ptr<double> &meanVPtr, const unsigned int nSlots);
905  StdDevHelper(StdDevHelper &&) = default;
906  StdDevHelper(const StdDevHelper &) = delete;
907  void InitTask(TTreeReader *, unsigned int) {}
908  void Exec(unsigned int slot, double v);
909 
910  template <typename T, typename std::enable_if<IsContainer<T>::value, int>::type = 0>
911  void Exec(unsigned int slot, const T &vs)
912  {
913  for (auto &&v : vs) {
914  Exec(slot, v);
915  }
916  }
917 
918  void Initialize() { /* noop */}
919 
920  void Finalize();
921 
922  std::string GetActionName() { return "StdDev"; }
923 };
924 
925 extern template void StdDevHelper::Exec(unsigned int, const std::vector<float> &);
926 extern template void StdDevHelper::Exec(unsigned int, const std::vector<double> &);
927 extern template void StdDevHelper::Exec(unsigned int, const std::vector<char> &);
928 extern template void StdDevHelper::Exec(unsigned int, const std::vector<int> &);
929 extern template void StdDevHelper::Exec(unsigned int, const std::vector<unsigned int> &);
930 
931 template <typename PrevNodeType>
932 class DisplayHelper : public RActionImpl<DisplayHelper<PrevNodeType>> {
933 private:
934  using Display_t = ROOT::RDF::RDisplay;
935  const std::shared_ptr<Display_t> fDisplayerHelper;
936  const std::shared_ptr<PrevNodeType> fPrevNode;
937 
938 public:
939  DisplayHelper(const std::shared_ptr<Display_t> &d, const std::shared_ptr<PrevNodeType> &prevNode)
940  : fDisplayerHelper(d), fPrevNode(prevNode)
941  {
942  }
943  DisplayHelper(DisplayHelper &&) = default;
944  DisplayHelper(const DisplayHelper &) = delete;
945  void InitTask(TTreeReader *, unsigned int) {}
946 
947  template <typename... Columns>
948  void Exec(unsigned int, Columns... columns)
949  {
950  fDisplayerHelper->AddRow(columns...);
951  if (!fDisplayerHelper->HasNext()) {
952  fPrevNode->StopProcessing();
953  }
954  }
955 
956  void Initialize() {}
957 
958  void Finalize() {}
959 
960  std::string GetActionName() { return "Display"; }
961 };
962 
963 // std::vector<bool> is special, and not in a good way. As a consequence Snapshot of RVec<bool> needs to be treated
964 // specially. In particular, if RVec<bool> is filled with a (fixed or variable size) boolean array coming from
965 // a ROOT file, when writing out the correspinding branch from a Snapshot we do not have an address to set for the
966 // TTree branch (std::vector<bool> and, consequently, RVec<bool> do not provide a `data()` method).
967 // Bools is a lightweight wrapper around a C array of booleans that is meant to provide a stable address for the
968 // output TTree to read the contents of the snapshotted branches at Fill time.
969 class BoolArray {
970  std::size_t fSize = 0;
971  bool *fBools = nullptr;
972 
973  bool *CopyVector(const RVec<bool> &v)
974  {
975  auto b = new bool[fSize];
976  std::copy(v.begin(), v.end(), b);
977  return b;
978  }
979 
980  bool *CopyArray(bool *o, std::size_t size)
981  {
982  auto b = new bool[size];
983  for (auto i = 0u; i < size; ++i)
984  b[i] = o[i];
985  return b;
986  }
987 
988 public:
989  // this generic constructor could be replaced with a constexpr if in SetBranchesHelper
990  BoolArray() = default;
991  template <typename T>
992  BoolArray(const T &) { throw std::runtime_error("This constructor should never be called"); }
993  BoolArray(const RVec<bool> &v) : fSize(v.size()), fBools(CopyVector(v)) {}
994  BoolArray(const BoolArray &b)
995  {
996  CopyArray(b.fBools, b.fSize);
997  }
998  BoolArray &operator=(const BoolArray &b)
999  {
1000  delete[] fBools;
1001  CopyArray(b.fBools, b.fSize);
1002  return *this;
1003  }
1004  BoolArray(BoolArray &&b)
1005  {
1006  fSize = b.fSize;
1007  fBools = b.fBools;
1008  b.fSize = 0;
1009  b.fBools = nullptr;
1010  }
1011  BoolArray &operator=(BoolArray &&b)
1012  {
1013  delete[] fBools;
1014  fSize = b.fSize;
1015  fBools = b.fBools;
1016  b.fSize = 0;
1017  b.fBools = nullptr;
1018  return *this;
1019  }
1020  ~BoolArray() { delete[] fBools; }
1021  std::size_t Size() const { return fSize; }
1022  bool *Data() { return fBools; }
1023 };
1024 using BoolArrayMap = std::map<std::string, BoolArray>;
1025 
1026 inline bool *UpdateBoolArrayIfBool(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName)
1027 {
1028  // create a boolArrays entry
1029  boolArrays[outName] = BoolArray(v);
1030  return boolArrays[outName].Data();
1031 }
1032 
1033 template <typename T>
1034 T *UpdateBoolArrayIfBool(BoolArrayMap &, RVec<T> &v, const std::string &)
1035 {
1036  return v.data();
1037 }
1038 
1039 // Helper which gets the return value of the data() method if the type is an
1040 // RVec (of anything but a bool), nullptr otherwise.
1041 inline void *GetData(ROOT::VecOps::RVec<bool> & /*v*/)
1042 {
1043  return nullptr;
1044 }
1045 
1046 template <typename T>
1047 void *GetData(ROOT::VecOps::RVec<T> &v)
1048 {
1049  return v.data();
1050 }
1051 
1052 template <typename T>
1053 void *GetData(T & /*v*/)
1054 {
1055  return nullptr;
1056 }
1057 
1058 
1059 template <typename T>
1060 void SetBranchesHelper(BoolArrayMap &, TTree * /*inputTree*/, TTree &outputTree, const std::string & /*validName*/,
1061  const std::string &name, TBranch *& branch, void *& branchAddress, T *address)
1062 {
1063  outputTree.Branch(name.c_str(), address);
1064  branch = nullptr;
1065  branchAddress = nullptr;
1066 }
1067 
1068 /// Helper function for SnapshotHelper and SnapshotHelperMT. It creates new branches for the output TTree of a Snapshot.
1069 /// This overload is called for columns of type `RVec<T>`. For RDF, these can represent:
1070 /// 1. c-style arrays in ROOT files, so we are sure that there are input trees to which we can ask the correct branch title
1071 /// 2. RVecs coming from a custom column or a source
1072 /// 3. vectors coming from ROOT files
1073 /// In case of 1., we save the pointer to the branch and the pointer to the input value. In case of 2. and 3. we save
1074 /// nullptrs.
1075 template <typename T>
1076 void SetBranchesHelper(BoolArrayMap &boolArrays, TTree *inputTree, TTree &outputTree, const std::string &inName,
1077  const std::string &outName, TBranch *&branch, void *&branchAddress, RVec<T> *ab)
1078 {
1079  auto *const inputBranch = inputTree ? inputTree->GetBranch(inName.c_str()) : nullptr;
1080  const auto mustWriteStdVec =
1081  !inputBranch || ROOT::ESTLType::kSTLvector == TClassEdit::IsSTLCont(inputBranch->GetClassName());
1082 
1083  if (mustWriteStdVec) {
1084  // Treat 2. and 3.:
1085  // 2. RVec coming from a custom column or a source
1086  // 3. RVec coming from a column on disk of type vector (the RVec is adopting the data of that vector)
1087  outputTree.Branch(outName.c_str(), &ab->AsVector());
1088  return;
1089  }
1090 
1091  // Treat 1, the C-array case
1092  auto *const leaf = static_cast<TLeaf *>(inputBranch->GetListOfLeaves()->UncheckedAt(0));
1093  const auto bname = leaf->GetName();
1094  const auto counterStr =
1095  leaf->GetLeafCount() ? std::string(leaf->GetLeafCount()->GetName()) : std::to_string(leaf->GetLenStatic());
1096  const auto btype = leaf->GetTypeName();
1097  const auto rootbtype = TypeName2ROOTTypeName(btype);
1098  const auto leaflist = std::string(bname) + "[" + counterStr + "]/" + rootbtype;
1099 
1100  /// RVec<bool> is special because std::vector<bool> is special. In particular, it has no `data()`,
1101  /// so we need to explicitly manage storage of the data that the tree needs to Fill branches with.
1102  auto dataPtr = UpdateBoolArrayIfBool(boolArrays, *ab, outName);
1103 
1104  auto *const outputBranch = outputTree.Branch(outName.c_str(), dataPtr, leaflist.c_str());
1105  outputBranch->SetTitle(inputBranch->GetTitle());
1106 
1107  // Record the branch ptr and the address associated to it if this is not a bool array
1108  if (!std::is_same<bool, T>::value) {
1109  branch = outputBranch;
1110  branchAddress = GetData(*ab);
1111  }
1112 }
1113 
1114 // generic version, no-op
1115 template <typename T>
1116 void UpdateBoolArray(BoolArrayMap &, T&, const std::string &, TTree &) {}
1117 
1118 // RVec<bool> overload, update boolArrays if needed
1119 inline void UpdateBoolArray(BoolArrayMap &boolArrays, RVec<bool> &v, const std::string &outName, TTree &t)
1120 {
1121  if (v.size() > boolArrays[outName].Size()) {
1122  boolArrays[outName] = BoolArray(v); // resize and copy
1123  t.SetBranchAddress(outName.c_str(), boolArrays[outName].Data());
1124  }
1125  else {
1126  std::copy(v.begin(), v.end(), boolArrays[outName].Data()); // just copy
1127  }
1128 }
1129 
1130 /// Helper object for a single-thread Snapshot action
1131 template <typename... BranchTypes>
1132 class SnapshotHelper : public RActionImpl<SnapshotHelper<BranchTypes...>> {
1133  const std::string fFileName;
1134  const std::string fDirName;
1135  const std::string fTreeName;
1136  const RSnapshotOptions fOptions;
1137  std::unique_ptr<TFile> fOutputFile;
1138  std::unique_ptr<TTree> fOutputTree; // must be a ptr because TTrees are not copy/move constructible
1139  bool fIsFirstEvent{true};
1140  const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1141  const ColumnNames_t fOutputBranchNames;
1142  TTree *fInputTree = nullptr; // Current input tree. Set at initialization time (`InitTask`)
1143  BoolArrayMap fBoolArrays; // Storage for C arrays of bools to be written out
1144  std::vector<TBranch *> fBranches; // Addresses of branches in output, non-null only for the ones holding C arrays
1145  std::vector<void *> fBranchAddresses; // Addresses associated to output branches, non-null only for the ones holding C arrays
1146 
1147 public:
1148  using ColumnTypes_t = TypeList<BranchTypes...>;
1149  SnapshotHelper(std::string_view filename, std::string_view dirname, std::string_view treename,
1150  const ColumnNames_t &vbnames, const ColumnNames_t &bnames, const RSnapshotOptions &options)
1151  : fFileName(filename), fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1152  fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fBranches(vbnames.size(), nullptr),
1153  fBranchAddresses(vbnames.size(), nullptr)
1154  {
1155  }
1156 
1157  SnapshotHelper(const SnapshotHelper &) = delete;
1158  SnapshotHelper(SnapshotHelper &&) = default;
1159 
1160  void InitTask(TTreeReader *r, unsigned int /* slot */)
1161  {
1162  if (!r) // empty source, nothing to do
1163  return;
1164  fInputTree = r->GetTree();
1165  // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1166  // addresses of the branch values
1167  fInputTree->AddClone(fOutputTree.get());
1168  }
1169 
1170  void Exec(unsigned int /* slot */, BranchTypes &... values)
1171  {
1172  using ind_t = std::index_sequence_for<BranchTypes...>;
1173  if (! fIsFirstEvent) {
1174  UpdateCArraysPtrs(values..., ind_t{});
1175  } else {
1176  SetBranches(values..., ind_t{});
1177  fIsFirstEvent = false;
1178  }
1179  UpdateBoolArrays(values..., ind_t{});
1180  fOutputTree->Fill();
1181  }
1182 
1183  template <std::size_t... S>
1184  void UpdateCArraysPtrs(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1185  {
1186  // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1187  // associated to those is re-allocated. As a result the value of the pointer can change therewith
1188  // leaving associated to the branch of the output tree an invalid pointer.
1189  // With this code, we set the value of the pointer in the output branch anew when needed.
1190  // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1191  // we need an int for the expander list.
1192  int expander[] = {(fBranches[S] && fBranchAddresses[S] != GetData(values)
1193  ? fBranches[S]->SetAddress(GetData(values)),
1194  fBranchAddresses[S] = GetData(values), 0 : 0, 0)...,
1195  0};
1196  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1197  }
1198 
1199  template <std::size_t... S>
1200  void SetBranches(BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1201  {
1202  // create branches in output tree (and fill fBoolArrays for RVec<bool> columns)
1203  int expander[] = {(SetBranchesHelper(fBoolArrays, fInputTree, *fOutputTree, fInputBranchNames[S],
1204  fOutputBranchNames[S], fBranches[S], fBranchAddresses[S], &values),
1205  0)...,
1206  0};
1207  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1208  }
1209 
1210  template <std::size_t... S>
1211  void UpdateBoolArrays(BranchTypes &...values, std::index_sequence<S...> /*dummy*/)
1212  {
1213  int expander[] = {(UpdateBoolArray(fBoolArrays, values, fOutputBranchNames[S], *fOutputTree), 0)..., 0};
1214  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1215  }
1216 
1217  void Initialize()
1218  {
1219  fOutputFile.reset(
1220  TFile::Open(fFileName.c_str(), fOptions.fMode.c_str(), /*ftitle=*/"",
1221  ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel)));
1222 
1223  if (!fDirName.empty()) {
1224  fOutputFile->mkdir(fDirName.c_str());
1225  fOutputFile->cd(fDirName.c_str());
1226  }
1227 
1228  fOutputTree =
1229  std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/fOutputFile.get());
1230 
1231  if (fOptions.fAutoFlush)
1232  fOutputTree->SetAutoFlush(fOptions.fAutoFlush);
1233  }
1234 
1235  void Finalize()
1236  {
1237  if (fOutputFile && fOutputTree) {
1238  ::TDirectory::TContext ctxt(fOutputFile->GetDirectory(fDirName.c_str()));
1239  fOutputTree->Write();
1240  // must destroy the TTree first, otherwise TFile will delete it too leading to a double delete
1241  fOutputTree.reset();
1242  fOutputFile->Close();
1243  } else {
1244  Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1245  }
1246  }
1247 
1248  std::string GetActionName() { return "Snapshot"; }
1249 };
1250 
1251 /// Helper object for a multi-thread Snapshot action
1252 template <typename... BranchTypes>
1253 class SnapshotHelperMT : public RActionImpl<SnapshotHelperMT<BranchTypes...>> {
1254  const unsigned int fNSlots;
1255  std::unique_ptr<ROOT::Experimental::TBufferMerger> fMerger; // must use a ptr because TBufferMerger is not movable
1256  std::vector<std::shared_ptr<ROOT::Experimental::TBufferMergerFile>> fOutputFiles;
1257  std::vector<std::unique_ptr<TTree>> fOutputTrees;
1258  std::vector<int> fIsFirstEvent; // vector<bool> does not allow concurrent writing of different elements
1259  const std::string fFileName; // name of the output file name
1260  const std::string fDirName; // name of TFile subdirectory in which output must be written (possibly empty)
1261  const std::string fTreeName; // name of output tree
1262  const RSnapshotOptions fOptions; // struct holding options to pass down to TFile and TTree in this action
1263  const ColumnNames_t fInputBranchNames; // This contains the resolved aliases
1264  const ColumnNames_t fOutputBranchNames;
1265  std::vector<TTree *> fInputTrees; // Current input trees. Set at initialization time (`InitTask`)
1266  std::vector<BoolArrayMap> fBoolArrays; // Per-thread storage for C arrays of bools to be written out
1267  // Addresses of branches in output per slot, non-null only for the ones holding C arrays
1268  std::vector<std::vector<TBranch *>> fBranches;
1269  // Addresses associated to output branches per slot, non-null only for the ones holding C arrays
1270  std::vector<std::vector<void *>> fBranchAddresses;
1271 
1272 public:
1273  using ColumnTypes_t = TypeList<BranchTypes...>;
1274  SnapshotHelperMT(const unsigned int nSlots, std::string_view filename, std::string_view dirname,
1275  std::string_view treename, const ColumnNames_t &vbnames, const ColumnNames_t &bnames,
1276  const RSnapshotOptions &options)
1277  : fNSlots(nSlots), fOutputFiles(fNSlots), fOutputTrees(fNSlots), fIsFirstEvent(fNSlots, 1), fFileName(filename),
1278  fDirName(dirname), fTreeName(treename), fOptions(options), fInputBranchNames(vbnames),
1279  fOutputBranchNames(ReplaceDotWithUnderscore(bnames)), fInputTrees(fNSlots), fBoolArrays(fNSlots),
1280  fBranches(fNSlots, std::vector<TBranch *>(vbnames.size(), nullptr)),
1281  fBranchAddresses(fNSlots, std::vector<void *>(vbnames.size(), nullptr))
1282  {
1283  TString checkupdate = fOptions.fMode;
1284  checkupdate.ToLower();
1285  if (checkupdate == "update") {
1286  throw std::invalid_argument("Snapshot: fMode == \"update\" not supported when implicit MT is enabled");
1287  }
1288  }
1289  SnapshotHelperMT(const SnapshotHelperMT &) = delete;
1290  SnapshotHelperMT(SnapshotHelperMT &&) = default;
1291 
1292  void InitTask(TTreeReader *r, unsigned int slot)
1293  {
1294  ::TDirectory::TContext c; // do not let tasks change the thread-local gDirectory
1295  if (!fOutputFiles[slot]) {
1296  // first time this thread executes something, let's create a TBufferMerger output directory
1297  fOutputFiles[slot] = fMerger->GetFile();
1298  }
1299  TDirectory *treeDirectory = fOutputFiles[slot].get();
1300  if (!fDirName.empty()) {
1301  // call returnExistingDirectory=true since MT can end up making this call multiple times
1302  treeDirectory = fOutputFiles[slot]->mkdir(fDirName.c_str(), "", true);
1303  }
1304  // re-create output tree as we need to create its branches again, with new input variables
1305  // TODO we could instead create the output tree and its branches, change addresses of input variables in each task
1306  fOutputTrees[slot] =
1307  std::make_unique<TTree>(fTreeName.c_str(), fTreeName.c_str(), fOptions.fSplitLevel, /*dir=*/treeDirectory);
1308  fOutputTrees[slot]->SetBit(TTree::kEntriesReshuffled);
1309  // TODO can be removed when RDF supports interleaved TBB task execution properly, see ROOT-10269
1310  fOutputTrees[slot]->SetImplicitMT(false);
1311  if (fOptions.fAutoFlush)
1312  fOutputTrees[slot]->SetAutoFlush(fOptions.fAutoFlush);
1313  if (r) {
1314  // not an empty-source RDF
1315  fInputTrees[slot] = r->GetTree();
1316  // AddClone guarantees that if the input file changes the branches of the output tree are updated with the new
1317  // addresses of the branch values. We need this in case of friend trees with different cluster granularity
1318  // than the main tree.
1319  // FIXME: AddClone might result in many many (safe) warnings printed by TTree::CopyAddresses, see ROOT-9487.
1320  const auto friendsListPtr = fInputTrees[slot]->GetListOfFriends();
1321  if (friendsListPtr && friendsListPtr->GetEntries() > 0)
1322  fInputTrees[slot]->AddClone(fOutputTrees[slot].get());
1323  }
1324  fIsFirstEvent[slot] = 1; // reset first event flag for this slot
1325  }
1326 
1327  void FinalizeTask(unsigned int slot)
1328  {
1329  if (fOutputTrees[slot]->GetEntries() > 0)
1330  fOutputFiles[slot]->Write();
1331  // clear now to avoid concurrent destruction of output trees and input tree (which has them listed as fClones)
1332  fOutputTrees[slot].reset(nullptr);
1333  }
1334 
1335  void Exec(unsigned int slot, BranchTypes &... values)
1336  {
1337  using ind_t = std::index_sequence_for<BranchTypes...>;
1338  if (!fIsFirstEvent[slot]) {
1339  UpdateCArraysPtrs(slot, values..., ind_t{});
1340  } else {
1341  SetBranches(slot, values..., ind_t{});
1342  fIsFirstEvent[slot] = 0;
1343  }
1344  UpdateBoolArrays(slot, values..., ind_t{});
1345  fOutputTrees[slot]->Fill();
1346  auto entries = fOutputTrees[slot]->GetEntries();
1347  auto autoFlush = fOutputTrees[slot]->GetAutoFlush();
1348  if ((autoFlush > 0) && (entries % autoFlush == 0))
1349  fOutputFiles[slot]->Write();
1350  }
1351 
1352  template <std::size_t... S>
1353  void UpdateCArraysPtrs(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1354  {
1355  // This code deals with branches which hold C arrays of variable size. It can happen that the buffers
1356  // associated to those is re-allocated. As a result the value of the pointer can change therewith
1357  // leaving associated to the branch of the output tree an invalid pointer.
1358  // With this code, we set the value of the pointer in the output branch anew when needed.
1359  // Nota bene: the extra ",0" after the invocation of SetAddress, is because that method returns void and
1360  // we need an int for the expander list.
1361  (void)slot; // avoid bogus 'unused parameter' warning
1362  int expander[] = {(fBranches[slot][S] && fBranchAddresses[slot][S] != GetData(values)
1363  ? fBranches[slot][S]->SetAddress(GetData(values)),
1364  fBranchAddresses[slot][S] = GetData(values), 0 : 0, 0)...,
1365  0};
1366  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1367  }
1368 
1369  template <std::size_t... S>
1370  void SetBranches(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1371  {
1372  // hack to call TTree::Branch on all variadic template arguments
1373  int expander[] = {
1374  (SetBranchesHelper(fBoolArrays[slot], fInputTrees[slot], *fOutputTrees[slot], fInputBranchNames[S],
1375  fOutputBranchNames[S], fBranches[slot][S], fBranchAddresses[slot][S], &values),
1376  0)...,
1377  0};
1378  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1379  (void)slot; // avoid unused variable warnings in gcc6.2
1380  }
1381 
1382  template <std::size_t... S>
1383  void UpdateBoolArrays(unsigned int slot, BranchTypes &... values, std::index_sequence<S...> /*dummy*/)
1384  {
1385  (void)slot; // avoid bogus 'unused parameter' warning
1386  int expander[] = {
1387  (UpdateBoolArray(fBoolArrays[slot], values, fOutputBranchNames[S], *fOutputTrees[slot]), 0)..., 0};
1388  (void)expander; // avoid unused variable warnings for older compilers such as gcc 4.9
1389  }
1390 
1391  void Initialize()
1392  {
1393  const auto cs = ROOT::CompressionSettings(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
1394  fMerger = std::make_unique<ROOT::Experimental::TBufferMerger>(fFileName.c_str(), fOptions.fMode.c_str(), cs);
1395  }
1396 
1397  void Finalize()
1398  {
1399  auto fileWritten = false;
1400  for (auto &file : fOutputFiles) {
1401  if (file) {
1402  file->Write();
1403  file->Close();
1404  fileWritten = true;
1405  }
1406  }
1407 
1408  if (!fileWritten) {
1409  Warning("Snapshot", "A lazy Snapshot action was booked but never triggered.");
1410  }
1411 
1412  // flush all buffers to disk by destroying the TBufferMerger
1413  fOutputFiles.clear();
1414  fMerger.reset();
1415  }
1416 
1417  std::string GetActionName() { return "Snapshot"; }
1418 };
1419 
1420 template <typename Acc, typename Merge, typename R, typename T, typename U,
1421  bool MustCopyAssign = std::is_same<R, U>::value>
1422 class AggregateHelper : public RActionImpl<AggregateHelper<Acc, Merge, R, T, U, MustCopyAssign>> {
1423  Acc fAggregate;
1424  Merge fMerge;
1425  const std::shared_ptr<U> fResult;
1426  Results<U> fAggregators;
1427 
1428 public:
1429  using ColumnTypes_t = TypeList<T>;
1430  AggregateHelper(Acc &&f, Merge &&m, const std::shared_ptr<U> &result, const unsigned int nSlots)
1431  : fAggregate(std::move(f)), fMerge(std::move(m)), fResult(result), fAggregators(nSlots, *result)
1432  {
1433  }
1434  AggregateHelper(AggregateHelper &&) = default;
1435  AggregateHelper(const AggregateHelper &) = delete;
1436 
1437  void InitTask(TTreeReader *, unsigned int) {}
1438 
1439  template <bool MustCopyAssign_ = MustCopyAssign, typename std::enable_if<MustCopyAssign_, int>::type = 0>
1440  void Exec(unsigned int slot, const T &value)
1441  {
1442  fAggregators[slot] = fAggregate(fAggregators[slot], value);
1443  }
1444 
1445  template <bool MustCopyAssign_ = MustCopyAssign, typename std::enable_if<!MustCopyAssign_, int>::type = 0>
1446  void Exec(unsigned int slot, const T &value)
1447  {
1448  fAggregate(fAggregators[slot], value);
1449  }
1450 
1451  void Initialize() { /* noop */}
1452 
1453  template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1454  bool MergeAll = std::is_same<void, MergeRet>::value>
1455  typename std::enable_if<MergeAll, void>::type Finalize()
1456  {
1457  fMerge(fAggregators);
1458  *fResult = fAggregators[0];
1459  }
1460 
1461  template <typename MergeRet = typename CallableTraits<Merge>::ret_type,
1462  bool MergeTwoByTwo = std::is_same<U, MergeRet>::value>
1463  typename std::enable_if<MergeTwoByTwo, void>::type Finalize(...) // ... needed to let compiler distinguish overloads
1464  {
1465  for (const auto &acc : fAggregators)
1466  *fResult = fMerge(*fResult, acc);
1467  }
1468 
1469  U &PartialUpdate(unsigned int slot) { return fAggregators[slot]; }
1470 
1471  std::string GetActionName() { return "Aggregate"; }
1472 };
1473 
1474 } // end of NS RDF
1475 } // end of NS Internal
1476 } // end of NS ROOT
1477 
1478 /// \endcond
1479 
1480 #endif