Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RInterface.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RDF_TINTERFACE
12 #define ROOT_RDF_TINTERFACE
13 
14 #include "ROOT/RDataSource.hxx"
17 #include "ROOT/RDF/HistoModels.hxx"
19 #include "ROOT/RDF/RRange.hxx"
20 #include "ROOT/RDF/Utils.hxx"
22 #include "ROOT/RDF/RLazyDSImpl.hxx"
23 #include "ROOT/RResultPtr.hxx"
25 #include "ROOT/RStringView.hxx"
26 #include "ROOT/TypeTraits.hxx"
27 #include "RtypesCore.h" // for ULong64_t
28 #include "TH1.h" // For Histo actions
29 #include "TH2.h" // For Histo actions
30 #include "TH3.h" // For Histo actions
31 #include "TProfile.h"
32 #include "TProfile2D.h"
33 #include "TStatistic.h"
34 
35 #include <algorithm>
36 #include <cstddef>
37 #include <initializer_list>
38 #include <limits>
39 #include <memory>
40 #include <sstream>
41 #include <stdexcept>
42 #include <string>
43 #include <type_traits> // is_same, enable_if
44 #include <typeinfo>
45 #include <vector>
46 
47 class TGraph;
48 
49 // Windows requires a forward decl of printValue to accept it as a valid friend function in RInterface
50 namespace ROOT {
51 void DisableImplicitMT();
52 bool IsImplicitMTEnabled();
53 void EnableImplicitMT(UInt_t numthreads);
54 class RDataFrame;
55 namespace Internal {
56 namespace RDF {
57 class GraphCreatorHelper;
58 }
59 }
60 }
61 namespace cling {
62 std::string printValue(ROOT::RDataFrame *tdf);
63 }
64 
65 namespace ROOT {
66 namespace RDF {
67 namespace RDFDetail = ROOT::Detail::RDF;
68 namespace RDFInternal = ROOT::Internal::RDF;
69 namespace TTraits = ROOT::TypeTraits;
70 
71 template <typename Proxied, typename DataSource>
72 class RInterface;
73 
74 using RNode = RInterface<::ROOT::Detail::RDF::RNodeBase, void>;
75 
76 // clang-format off
77 /**
78  * \class ROOT::RDF::RInterface
79  * \ingroup dataframe
80  * \brief The public interface to the RDataFrame federation of classes
81  * \tparam Proxied One of the "node" base types (e.g. RLoopManager, RFilterBase). The user never specifies this type manually.
82  * \tparam DataSource The type of the RDataSource which is providing the data to the data frame. There is no source by default.
83  *
84  * The documentation of each method features a one liner illustrating how to use the method, for example showing how
85  * the majority of the template parameters are automatically deduced requiring no or very little effort by the user.
86  */
87 // clang-format on
88 template <typename Proxied, typename DataSource = void>
89 class RInterface {
90  using DS_t = DataSource;
91  using ColumnNames_t = RDFDetail::ColumnNames_t;
92  using RFilterBase = RDFDetail::RFilterBase;
93  using RRangeBase = RDFDetail::RRangeBase;
94  using RLoopManager = RDFDetail::RLoopManager;
95  friend std::string cling::printValue(::ROOT::RDataFrame *tdf); // For a nice printing at the prompt
96  friend class RDFInternal::GraphDrawing::GraphCreatorHelper;
97 
98  template <typename T, typename W>
99  friend class RInterface;
100 
101  std::shared_ptr<Proxied> fProxiedPtr; ///< Smart pointer to the graph node encapsulated by this RInterface.
102  ///< The RLoopManager at the root of this computation graph. Never null.
103  RLoopManager *fLoopManager;
104  /// Non-owning pointer to a data-source object. Null if no data-source. RLoopManager has ownership of the object.
105  RDataSource *fDataSource = nullptr;
106 
107  /// Contains the custom columns defined up to this node.
108  RDFInternal::RBookedCustomColumns fCustomColumns;
109 
110 public:
111  ////////////////////////////////////////////////////////////////////////////
112  /// \brief Copy-assignment operator for RInterface.
113  RInterface &operator=(const RInterface &) = default;
114 
115  ////////////////////////////////////////////////////////////////////////////
116  /// \brief Copy-ctor for RInterface.
117  RInterface(const RInterface &) = default;
118 
119  ////////////////////////////////////////////////////////////////////////////
120  /// \brief Move-ctor for RInterface.
121  RInterface(RInterface &&) = default;
122 
123  ////////////////////////////////////////////////////////////////////////////
124  /// \brief Only enabled when building a RInterface<RLoopManager>
125  template <typename T = Proxied, typename std::enable_if<std::is_same<T, RLoopManager>::value, int>::type = 0>
126  RInterface(const std::shared_ptr<Proxied> &proxied)
127  : fProxiedPtr(proxied), fLoopManager(proxied.get()), fDataSource(proxied->GetDataSource())
128  {
129  AddDefaultColumns();
130  }
131 
132  ////////////////////////////////////////////////////////////////////////////
133  /// \brief Cast any RDataFrame node to a common type ROOT::RDF::RNode.
134  /// Different RDataFrame methods return different C++ types. All nodes, however,
135  /// can be cast to this common type at the cost of a small performance penalty.
136  /// This allows, for example, storing RDataFrame nodes in a vector, or passing them
137  /// around via (non-template, C++11) helper functions.
138  /// Example usage:
139  /// ~~~{.cpp}
140  /// // a function that conditionally adds a Range to a RDataFrame node.
141  /// RNode MaybeAddRange(RNode df, bool mustAddRange)
142  /// {
143  /// return mustAddRange ? df.Range(1) : df;
144  /// }
145  /// // use as :
146  /// ROOT::RDataFrame df(10);
147  /// auto maybeRanged = MaybeAddRange(df, true);
148  /// ~~~
149  /// Note that it is not a problem to pass RNode's by value.
150  operator RNode() const
151  {
152  return RNode(std::static_pointer_cast<::ROOT::Detail::RDF::RNodeBase>(fProxiedPtr), *fLoopManager, fCustomColumns,
153  fDataSource);
154  }
155 
156  ////////////////////////////////////////////////////////////////////////////
157  /// \brief Append a filter to the call graph.
158  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
159  /// signalling whether the event has passed the selection (true) or not (false).
160  /// \param[in] columns Names of the columns/branches in input to the filter function.
161  /// \param[in] name Optional name of this filter. See `Report`.
162  /// \return the filter node of the computation graph.
163  ///
164  /// Append a filter node at the point of the call graph corresponding to the
165  /// object this method is called on.
166  /// The callable `f` should not have side-effects (e.g. modification of an
167  /// external or static variable) to ensure correct results when implicit
168  /// multi-threading is active.
169  ///
170  /// RDataFrame only evaluates filters when necessary: if multiple filters
171  /// are chained one after another, they are executed in order and the first
172  /// one returning false causes the event to be discarded.
173  /// Even if multiple actions or transformations depend on the same filter,
174  /// it is executed once per entry. If its result is requested more than
175  /// once, the cached result is served.
176  ///
177  /// ### Example usage:
178  /// ~~~{.cpp}
179  /// // C++ callable (function, functor class, lambda...) that takes two parameters of the types of "x" and "y"
180  /// auto filtered = df.Filter(myCut, {"x", "y"});
181  ///
182  /// // String: it must contain valid C++ except that column names can be used instead of variable names
183  /// auto filtered = df.Filter("x*y > 0");
184  /// ~~~
185  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
186  RInterface<RDFDetail::RFilter<F, Proxied>, DS_t>
187  Filter(F f, const ColumnNames_t &columns = {}, std::string_view name = "")
188  {
189  RDFInternal::CheckFilter(f);
190  using ColTypes_t = typename TTraits::CallableTraits<F>::arg_types;
191  constexpr auto nColumns = ColTypes_t::list_size;
192  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
193  const auto newColumns =
194  CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
195 
196  using F_t = RDFDetail::RFilter<F, Proxied>;
197 
198  auto filterPtr = std::make_shared<F_t>(std::move(f), validColumnNames, fProxiedPtr, newColumns, name);
199  fLoopManager->Book(filterPtr.get());
200  return RInterface<F_t, DS_t>(std::move(filterPtr), *fLoopManager, newColumns, fDataSource);
201  }
202 
203  ////////////////////////////////////////////////////////////////////////////
204  /// \brief Append a filter to the call graph.
205  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
206  /// signalling whether the event has passed the selection (true) or not (false).
207  /// \param[in] name Optional name of this filter. See `Report`.
208  /// \return the filter node of the computation graph.
209  ///
210  /// Refer to the first overload of this method for the full documentation.
211  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
212  RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, std::string_view name)
213  {
214  // The sfinae is there in order to pick up the overloaded method which accepts two strings
215  // rather than this template method.
216  return Filter(f, {}, name);
217  }
218 
219  ////////////////////////////////////////////////////////////////////////////
220  /// \brief Append a filter to the call graph.
221  /// \param[in] f Function, lambda expression, functor class or any other callable object. It must return a `bool`
222  /// signalling whether the event has passed the selection (true) or not (false).
223  /// \param[in] columns Names of the columns/branches in input to the filter function.
224  /// \return the filter node of the computation graph.
225  ///
226  /// Refer to the first overload of this method for the full documentation.
227  template <typename F>
228  RInterface<RDFDetail::RFilter<F, Proxied>, DS_t> Filter(F f, const std::initializer_list<std::string> &columns)
229  {
230  return Filter(f, ColumnNames_t{columns});
231  }
232 
233  ////////////////////////////////////////////////////////////////////////////
234  /// \brief Append a filter to the call graph.
235  /// \param[in] expression The filter expression in C++
236  /// \param[in] name Optional name of this filter. See `Report`.
237  /// \return the filter node of the computation graph.
238  ///
239  /// The expression is just-in-time compiled and used to filter entries. It must
240  /// be valid C++ syntax in which variable names are substituted with the names
241  /// of branches/columns.
242  ///
243  /// ### Example usage:
244  /// ~~~{.cpp}
245  /// auto filtered_df = df.Filter("myCollection.size() > 3");
246  /// auto filtered_name_df = df.Filter("myCollection.size() > 3", "Minumum collection size");
247  /// ~~~
248  RInterface<RDFDetail::RJittedFilter, DS_t> Filter(std::string_view expression, std::string_view name = "")
249  {
250  // deleted by the jitted call to JitFilterHelper
251  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
252  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
253  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
254  const auto jittedFilter = std::make_shared<RDFDetail::RJittedFilter>(fLoopManager, name);
255 
256  RDFInternal::BookFilterJit(jittedFilter.get(), upcastNodeOnHeap, name, expression, fLoopManager->GetAliasMap(),
257  fLoopManager->GetBranchNames(), fCustomColumns, fLoopManager->GetTree(), fDataSource,
258  fLoopManager->GetID());
259 
260  fLoopManager->Book(jittedFilter.get());
261  return RInterface<RDFDetail::RJittedFilter, DS_t>(std::move(jittedFilter), *fLoopManager, fCustomColumns,
262  fDataSource);
263  }
264 
265  // clang-format off
266  ////////////////////////////////////////////////////////////////////////////
267  /// \brief Creates a custom column
268  /// \param[in] name The name of the custom column.
269  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
270  /// \param[in] columns Names of the columns/branches in input to the producer function.
271  /// \return the first node of the computation graph for which the new quantity is defined.
272  ///
273  /// Create a custom column that will be visible from all subsequent nodes
274  /// of the functional chain. The `expression` is only evaluated for entries that pass
275  /// all the preceding filters.
276  /// A new variable is created called `name`, accessible as if it was contained
277  /// in the dataset from subsequent transformations/actions.
278  ///
279  /// Use cases include:
280  /// * caching the results of complex calculations for easy and efficient multiple access
281  /// * extraction of quantities of interest from complex objects
282  ///
283  /// An exception is thrown if the name of the new column is already in use in this branch of the computation graph.
284  ///
285  /// ### Example usage:
286  /// ~~~{.cpp}
287  /// // assuming a function with signature:
288  /// double myComplexCalculation(const RVec<float> &muon_pts);
289  /// // we can pass it directly to Define
290  /// auto df_with_define = df.Define("newColumn", myComplexCalculation, {"muon_pts"});
291  /// // alternatively, we can pass the body of the function as a string, as in Filter:
292  /// auto df_with_define = df.Define("newColumn", "x*x + y*y");
293  /// ~~~
294  template <typename F, typename std::enable_if<!std::is_convertible<F, std::string>::value, int>::type = 0>
295  RInterface<Proxied, DS_t> Define(std::string_view name, F expression, const ColumnNames_t &columns = {})
296  {
297  return DefineImpl<F, RDFDetail::CustomColExtraArgs::None>(name, std::move(expression), columns);
298  }
299  // clang-format on
300 
301  // clang-format off
302  ////////////////////////////////////////////////////////////////////////////
303  /// \brief Creates a custom column with a value dependent on the processing slot.
304  /// \param[in] name The name of the custom column.
305  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
306  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding the slot number).
307  /// \return the first node of the computation graph for which the new quantity is defined.
308  ///
309  /// This alternative implementation of `Define` is meant as a helper in writing thread-safe custom columns.
310  /// The expression must be a callable of signature R(unsigned int, T1, T2, ...) where `T1, T2...` are the types
311  /// of the columns that the expression takes as input. The first parameter is reserved for an unsigned integer
312  /// representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
313  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1.
314  ///
315  /// The following two calls are equivalent, although `DefineSlot` is slightly more performant:
316  /// ~~~{.cpp}
317  /// int function(unsigned int, double, double);
318  /// df.Define("x", function, {"rdfslot_", "column1", "column2"})
319  /// df.DefineSlot("x", function, {"column1", "column2"})
320  /// ~~~
321  ///
322  /// See Define for more information.
323  template <typename F>
324  RInterface<Proxied, DS_t> DefineSlot(std::string_view name, F expression, const ColumnNames_t &columns = {})
325  {
326  return DefineImpl<F, RDFDetail::CustomColExtraArgs::Slot>(name, std::move(expression), columns);
327  }
328  // clang-format on
329 
330  // clang-format off
331  ////////////////////////////////////////////////////////////////////////////
332  /// \brief Creates a custom column with a value dependent on the processing slot and the current entry.
333  /// \param[in] name The name of the custom column.
334  /// \param[in] expression Function, lambda expression, functor class or any other callable object producing the temporary value. Returns the value that will be assigned to the custom column.
335  /// \param[in] columns Names of the columns/branches in input to the producer function (excluding slot and entry).
336  /// \return the first node of the computation graph for which the new quantity is defined.
337  ///
338  /// This alternative implementation of `Define` is meant as a helper in writing entry-specific, thread-safe custom
339  /// columns. The expression must be a callable of signature R(unsigned int, ULong64_t, T1, T2, ...) where `T1, T2...`
340  /// are the types of the columns that the expression takes as input. The first parameter is reserved for an unsigned
341  /// integer representing a "slot number". RDataFrame guarantees that different threads will invoke the expression with
342  /// different slot numbers - slot numbers will range from zero to ROOT::GetImplicitMTPoolSize()-1. The second parameter
343  /// is reserved for a `ULong64_t` representing the current entry being processed by the current thread.
344  ///
345  /// The following two `Define`s are equivalent, although `DefineSlotEntry` is slightly more performant:
346  /// ~~~{.cpp}
347  /// int function(unsigned int, ULong64_t, double, double);
348  /// Define("x", function, {"rdfslot_", "rdfentry_", "column1", "column2"})
349  /// DefineSlotEntry("x", function, {"column1", "column2"})
350  /// ~~~
351  ///
352  /// See Define for more information.
353  template <typename F>
354  RInterface<Proxied, DS_t> DefineSlotEntry(std::string_view name, F expression, const ColumnNames_t &columns = {})
355  {
356  return DefineImpl<F, RDFDetail::CustomColExtraArgs::SlotAndEntry>(name, std::move(expression), columns);
357  }
358  // clang-format on
359 
360  ////////////////////////////////////////////////////////////////////////////
361  /// \brief Creates a custom column
362  /// \param[in] name The name of the custom column.
363  /// \param[in] expression An expression in C++ which represents the temporary value
364  /// \return the first node of the computation graph for which the new quantity is defined.
365  ///
366  /// The expression is just-in-time compiled and used to produce the column entries.
367  /// It must be valid C++ syntax in which variable names are substituted with the names
368  /// of branches/columns.
369  ///
370  /// Refer to the first overload of this method for the full documentation.
371  RInterface<Proxied, DS_t> Define(std::string_view name, std::string_view expression)
372  {
373  // this check must be done before jitting lest we throw exceptions in jitted code
374  RDFInternal::CheckCustomColumn(name, fLoopManager->GetTree(), fCustomColumns.GetNames(),
375  fLoopManager->GetAliasMap(),
376  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
377 
378  auto jittedCustomColumn =
379  std::make_shared<RDFDetail::RJittedCustomColumn>(fLoopManager, name, fLoopManager->GetNSlots());
380 
381  RDFInternal::BookDefineJit(name, expression, *fLoopManager, fDataSource, jittedCustomColumn, fCustomColumns,
382  fLoopManager->GetBranchNames());
383 
384  RDFInternal::RBookedCustomColumns newCols(fCustomColumns);
385  newCols.AddName(name);
386  newCols.AddColumn(jittedCustomColumn, name);
387 
388  fLoopManager->RegisterCustomColumn(jittedCustomColumn.get());
389 
390  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
391 
392  return newInterface;
393  }
394 
395  ////////////////////////////////////////////////////////////////////////////
396  /// \brief Allow to refer to a column with a different name
397  /// \param[in] alias name of the column alias
398  /// \param[in] columnName of the column to be aliased
399  /// \return the first node of the computation graph for which the alias is available.
400  ///
401  /// Aliasing an alias is supported.
402  ///
403  /// ### Example usage:
404  /// ~~~{.cpp}
405  /// auto df_with_alias = df.Alias("simple_name", "very_long&complex_name!!!");
406  /// ~~~
407  RInterface<Proxied, DS_t> Alias(std::string_view alias, std::string_view columnName)
408  {
409  // The symmetry with Define is clear. We want to:
410  // - Create globally the alias and return this very node, unchanged
411  // - Make aliases accessible based on chains and not globally
412 
413  // Helper to find out if a name is a column
414  auto &dsColumnNames = fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{};
415 
416  // If the alias name is a column name, there is a problem
417  RDFInternal::CheckCustomColumn(alias, fLoopManager->GetTree(), fCustomColumns.GetNames(),
418  fLoopManager->GetAliasMap(), dsColumnNames);
419 
420  const auto validColumnName = GetValidatedColumnNames(1, {std::string(columnName)})[0];
421 
422  fLoopManager->AddColumnAlias(std::string(alias), validColumnName);
423 
424  RDFInternal::RBookedCustomColumns newCols(fCustomColumns);
425 
426  newCols.AddName(alias);
427  RInterface<Proxied, DS_t> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
428 
429  return newInterface;
430  }
431 
432  ////////////////////////////////////////////////////////////////////////////
433  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
434  /// \tparam ColumnTypes variadic list of branch/column types.
435  /// \param[in] treename The name of the output TTree.
436  /// \param[in] filename The name of the output TFile.
437  /// \param[in] columnList The list of names of the columns/branches to be written.
438  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
439  /// \return a `RDataFrame` that wraps the snapshotted dataset.
440  ///
441  /// Support for writing of nested branches is limited (although RDataFrame is able to read them) and dot ('.')
442  /// characters in input column names will be replaced by underscores ('_') in the branches produced by Snapshot.
443  /// When writing a variable size array through Snapshot, it is required that the column indicating its size is also
444  /// written out and it appears before the array in the columnList.
445  ///
446  /// ### Example invocations:
447  ///
448  /// ~~~{.cpp}
449  /// // without specifying template parameters (column types automatically deduced)
450  /// df.Snapshot("outputTree", "outputFile.root", {"x", "y"});
451  ///
452  /// // specifying template parameters ("x" is `int`, "y" is `float`)
453  /// df.Snapshot<int, float>("outputTree", "outputFile.root", {"x", "y"});
454  /// ~~~
455  ///
456  /// To book a Snapshot without triggering the event loop, one needs to set the appropriate flag in
457  /// `RSnapshotOptions`:
458  /// ~~~{.cpp}
459  /// RSnapshotOptions opts;
460  /// opts.fLazy = true;
461  /// df.Snapshot("outputTree", "outputFile.root", {"x"}, opts);
462  /// ~~~
463  template <typename... ColumnTypes>
464  RResultPtr<RInterface<RLoopManager>>
465  Snapshot(std::string_view treename, std::string_view filename, const ColumnNames_t &columnList,
466  const RSnapshotOptions &options = RSnapshotOptions())
467  {
468  return SnapshotImpl<ColumnTypes...>(treename, filename, columnList, options);
469  }
470 
471  ////////////////////////////////////////////////////////////////////////////
472  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
473  /// \param[in] treename The name of the output TTree.
474  /// \param[in] filename The name of the output TFile.
475  /// \param[in] columnList The list of names of the columns/branches to be written.
476  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
477  /// \return a `RDataFrame` that wraps the snapshotted dataset.
478  ///
479  /// This function returns a `RDataFrame` built with the output tree as a source.
480  /// The types of the columns are automatically inferred and do not need to be specified.
481  ///
482  /// See above for a more complete description and example usages.
483  RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
484  const ColumnNames_t &columnList,
485  const RSnapshotOptions &options = RSnapshotOptions())
486  {
487  // Early return: if the list of columns is empty, just return an empty RDF
488  // If we proceed, the jitted call will not compile!
489  if (columnList.empty()) {
490  auto nEntries = *this->Count();
491  auto snapshotRDF = std::make_shared<RInterface<RLoopManager>>(std::make_shared<RLoopManager>(nEntries));
492  return MakeResultPtr(snapshotRDF, *fLoopManager, nullptr);
493  }
494  auto tree = fLoopManager->GetTree();
495  const auto nsID = fLoopManager->GetID();
496  std::stringstream snapCall;
497  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
498  RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
499  fCustomColumns, fDataSource);
500 
501  // build a string equivalent to
502  // "resPtr = (RInterface<nodetype*>*)(this)->Snapshot<Ts...>(args...)"
503  RResultPtr<RInterface<RLoopManager>> resPtr;
504  snapCall << "*reinterpret_cast<ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>*>("
505  << RDFInternal::PrettyPrintAddr(&resPtr)
506  << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
507  << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Snapshot<";
508 
509  const auto &customCols = fCustomColumns.GetNames();
510  const auto dontConvertVector = false;
511 
512  const auto validColumnNames = GetValidatedColumnNames(columnList.size(), columnList);
513 
514  for (auto &c : validColumnNames) {
515  const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
516  const auto customColID = isCustom ? fCustomColumns.GetColumns().at(c)->GetID() : 0;
517  snapCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom, dontConvertVector,
518  customColID)
519  << ", ";
520  };
521  if (!columnList.empty())
522  snapCall.seekp(-2, snapCall.cur); // remove the last ",
523  snapCall << ">(\"" << treename << "\", \"" << filename << "\", "
524  << "*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
525  << RDFInternal::PrettyPrintAddr(&columnList) << "),"
526  << "*reinterpret_cast<ROOT::RDF::RSnapshotOptions*>(" << RDFInternal::PrettyPrintAddr(&options) << "));";
527  // jit snapCall, return result
528  fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
529  RDFInternal::InterpreterCalc(snapCall.str(), "Snapshot");
530  return resPtr;
531  }
532 
533  // clang-format off
534  ////////////////////////////////////////////////////////////////////////////
535  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
536  /// \param[in] treename The name of the output TTree.
537  /// \param[in] filename The name of the output TFile.
538  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
539  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree
540  /// \return a `RDataFrame` that wraps the snapshotted dataset.
541  ///
542  /// This function returns a `RDataFrame` built with the output tree as a source.
543  /// The types of the columns are automatically inferred and do not need to be specified.
544  ///
545  /// See above for a more complete description and example usages.
546  RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
547  std::string_view columnNameRegexp = "",
548  const RSnapshotOptions &options = RSnapshotOptions())
549  {
550  auto selectedColumns = RDFInternal::ConvertRegexToColumns(fCustomColumns,
551  fLoopManager->GetTree(),
552  fDataSource,
553  columnNameRegexp,
554  "Snapshot");
555  return Snapshot(treename, filename, selectedColumns, options);
556  }
557  // clang-format on
558 
559  // clang-format off
560  ////////////////////////////////////////////////////////////////////////////
561  /// \brief Save selected columns to disk, in a new TTree `treename` in file `filename`.
562  /// \param[in] treename The name of the output TTree.
563  /// \param[in] filename The name of the output TFile.
564  /// \param[in] columnList The list of names of the columns/branches to be written.
565  /// \param[in] options RSnapshotOptions struct with extra options to pass to TFile and TTree.
566  /// \return a `RDataFrame` that wraps the snapshotted dataset.
567  ///
568  /// This function returns a `RDataFrame` built with the output tree as a source.
569  /// The types of the columns are automatically inferred and do not need to be specified.
570  ///
571  /// See above for a more complete description and example usages.
572  RResultPtr<RInterface<RLoopManager>> Snapshot(std::string_view treename, std::string_view filename,
573  std::initializer_list<std::string> columnList,
574  const RSnapshotOptions &options = RSnapshotOptions())
575  {
576  ColumnNames_t selectedColumns(columnList);
577  return Snapshot(treename, filename, selectedColumns, options);
578  }
579  // clang-format on
580 
581  ////////////////////////////////////////////////////////////////////////////
582  /// \brief Save selected columns in memory
583  /// \tparam ColumnTypes variadic list of branch/column types.
584  /// \param[in] columns to be cached in memory.
585  /// \return a `RDataFrame` that wraps the cached dataset.
586  ///
587  /// This action returns a new `RDataFrame` object, completely detached from
588  /// the originating `RDataFrame`. The new dataframe only contains the cached
589  /// columns and stores their content in memory for fast, zero-copy subsequent access.
590  ///
591  /// Use `Cache` if you know you will only need a subset of the (`Filter`ed) data that
592  /// fits in memory and that will be accessed many times.
593  ///
594  /// ### Example usage:
595  ///
596  /// **Types and columns specified:**
597  /// ~~~{.cpp}
598  /// auto cache_some_cols_df = df.Cache<double, MyClass, int>({"col0", "col1", "col2"});
599  /// ~~~
600  ///
601  /// **Types inferred and columns specified (this invocation relies on jitting):**
602  /// ~~~{.cpp}
603  /// auto cache_some_cols_df = df.Cache({"col0", "col1", "col2"});
604  /// ~~~
605  ///
606  /// **Types inferred and columns selected with a regexp (this invocation relies on jitting):**
607  /// ~~~{.cpp}
608  /// auto cache_all_cols_df = df.Cache(myRegexp);
609  /// ~~~
610  template <typename... ColumnTypes>
611  RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
612  {
613  auto staticSeq = std::make_index_sequence<sizeof...(ColumnTypes)>();
614  return CacheImpl<ColumnTypes...>(columnList, staticSeq);
615  }
616 
617  ////////////////////////////////////////////////////////////////////////////
618  /// \brief Save selected columns in memory
619  /// \param[in] columns to be cached in memory
620  /// \return a `RDataFrame` that wraps the cached dataset.
621  ///
622  /// See the previous overloads for more information.
623  RInterface<RLoopManager> Cache(const ColumnNames_t &columnList)
624  {
625  // Early return: if the list of columns is empty, just return an empty RDF
626  // If we proceed, the jitted call will not compile!
627  if (columnList.empty()) {
628  auto nEntries = *this->Count();
629  RInterface<RLoopManager> emptyRDF(std::make_shared<RLoopManager>(nEntries));
630  return emptyRDF;
631  }
632 
633  auto tree = fLoopManager->GetTree();
634  const auto nsID = fLoopManager->GetID();
635  std::stringstream cacheCall;
636  auto upcastNode = RDFInternal::UpcastNode(fProxiedPtr);
637  RInterface<TTraits::TakeFirstParameter_t<decltype(upcastNode)>> upcastInterface(fProxiedPtr, *fLoopManager,
638  fCustomColumns, fDataSource);
639  // build a string equivalent to
640  // "(RInterface<nodetype*>*)(this)->Cache<Ts...>(*(ColumnNames_t*)(&columnList))"
641  RInterface<RLoopManager> resRDF(std::make_shared<ROOT::Detail::RDF::RLoopManager>(0));
642  cacheCall << "*reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>*>("
643  << RDFInternal::PrettyPrintAddr(&resRDF)
644  << ") = reinterpret_cast<ROOT::RDF::RInterface<ROOT::Detail::RDF::RNodeBase>*>("
645  << RDFInternal::PrettyPrintAddr(&upcastInterface) << ")->Cache<";
646 
647  const auto &customCols = fCustomColumns.GetNames();
648  for (auto &c : columnList) {
649  const auto isCustom = std::find(customCols.begin(), customCols.end(), c) != customCols.end();
650  const auto customColID = isCustom ? fCustomColumns.GetColumns().at(c)->GetID() : 0;
651  cacheCall << RDFInternal::ColumnName2ColumnTypeName(c, nsID, tree, fDataSource, isCustom,
652  /*vector2rvec=*/true, customColID)
653  << ", ";
654  };
655  if (!columnList.empty())
656  cacheCall.seekp(-2, cacheCall.cur); // remove the last ",
657  cacheCall << ">(*reinterpret_cast<std::vector<std::string>*>(" // vector<string> should be ColumnNames_t
658  << RDFInternal::PrettyPrintAddr(&columnList) << "));";
659  // jit cacheCall, return result
660  fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
661  RDFInternal::InterpreterCalc(cacheCall.str(), "Cache");
662  return resRDF;
663  }
664 
665  ////////////////////////////////////////////////////////////////////////////
666  /// \brief Save selected columns in memory
667  /// \param[in] columnNameRegexp The regular expression to match the column names to be selected. The presence of a '^' and a '$' at the end of the string is implicitly assumed if they are not specified. The dialect supported is PCRE via the TPRegexp class. An empty string signals the selection of all columns.
668  /// \return a `RDataFrame` that wraps the cached dataset.
669  ///
670  /// The existing columns are matched against the regular expression. If the string provided
671  /// is empty, all columns are selected. See the previous overloads for more information.
672  RInterface<RLoopManager> Cache(std::string_view columnNameRegexp = "")
673  {
674 
675  auto selectedColumns = RDFInternal::ConvertRegexToColumns(fCustomColumns, fLoopManager->GetTree(), fDataSource,
676  columnNameRegexp, "Cache");
677  return Cache(selectedColumns);
678  }
679 
680  ////////////////////////////////////////////////////////////////////////////
681  /// \brief Save selected columns in memory
682  /// \param[in] columns to be cached in memory.
683  /// \return a `RDataFrame` that wraps the cached dataset.
684  ///
685  /// See the previous overloads for more information.
686  RInterface<RLoopManager> Cache(std::initializer_list<std::string> columnList)
687  {
688  ColumnNames_t selectedColumns(columnList);
689  return Cache(selectedColumns);
690  }
691 
692  // clang-format off
693  ////////////////////////////////////////////////////////////////////////////
694  /// \brief Creates a node that filters entries based on range: [begin, end)
695  /// \param[in] begin Initial entry number considered for this range.
696  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
697  /// \param[in] stride Process one entry of the [begin, end) range every `stride` entries. Must be strictly greater than 0.
698  /// \return the first node of the computation graph for which the event loop is limited to a certain range of entries.
699  ///
700  /// Note that in case of previous Ranges and Filters the selected range refers to the transformed dataset.
701  /// Ranges are only available if EnableImplicitMT has _not_ been called. Multi-thread ranges are not supported.
702  ///
703  /// ### Example usage:
704  /// ~~~{.cpp}
705  /// auto d_0_30 = d.Range(0, 30); // Pick the first 30 entries
706  /// auto d_15_end = d.Range(15, 0); // Pick all entries from 15 onwards
707  /// auto d_15_end_3 = d.Range(15, 0, 3); // Stride: from event 15, pick an event every 3
708  /// ~~~
709  // clang-format on
710  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int begin, unsigned int end, unsigned int stride = 1)
711  {
712  // check invariants
713  if (stride == 0 || (end != 0 && end < begin))
714  throw std::runtime_error("Range: stride must be strictly greater than 0 and end must be greater than begin.");
715  CheckIMTDisabled("Range");
716 
717  using Range_t = RDFDetail::RRange<Proxied>;
718  auto rangePtr = std::make_shared<Range_t>(begin, end, stride, fProxiedPtr);
719  fLoopManager->Book(rangePtr.get());
720  RInterface<RDFDetail::RRange<Proxied>> tdf_r(std::move(rangePtr), *fLoopManager, fCustomColumns, fDataSource);
721  return tdf_r;
722  }
723 
724  // clang-format off
725  ////////////////////////////////////////////////////////////////////////////
726  /// \brief Creates a node that filters entries based on range
727  /// \param[in] end Final entry number (excluded) considered for this range. 0 means that the range goes until the end of the dataset.
728  /// \return a node of the computation graph for which the range is defined.
729  ///
730  /// See the other Range overload for a detailed description.
731  // clang-format on
732  RInterface<RDFDetail::RRange<Proxied>, DS_t> Range(unsigned int end) { return Range(0, end, 1); }
733 
734  // clang-format off
735  ////////////////////////////////////////////////////////////////////////////
736  /// \brief Execute a user-defined function on each entry (*instant action*)
737  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
738  /// \param[in] columns Names of the columns/branches in input to the user function.
739  ///
740  /// The callable `f` is invoked once per entry. This is an *instant action*:
741  /// upon invocation, an event loop as well as execution of all scheduled actions
742  /// is triggered.
743  /// Users are responsible for the thread-safety of this callable when executing
744  /// with implicit multi-threading enabled (i.e. ROOT::EnableImplicitMT).
745  ///
746  /// ### Example usage:
747  /// ~~~{.cpp}
748  /// myDf.Foreach([](int i){ std::cout << i << std::endl;}, {"myIntColumn"});
749  /// ~~~
750  // clang-format on
751  template <typename F>
752  void Foreach(F f, const ColumnNames_t &columns = {})
753  {
754  using arg_types = typename TTraits::CallableTraits<decltype(f)>::arg_types_nodecay;
755  using ret_type = typename TTraits::CallableTraits<decltype(f)>::ret_type;
756  ForeachSlot(RDFInternal::AddSlotParameter<ret_type>(f, arg_types()), columns);
757  }
758 
759  // clang-format off
760  ////////////////////////////////////////////////////////////////////////////
761  /// \brief Execute a user-defined function requiring a processing slot index on each entry (*instant action*)
762  /// \param[in] f Function, lambda expression, functor class or any other callable object performing user defined calculations.
763  /// \param[in] columns Names of the columns/branches in input to the user function.
764  ///
765  /// Same as `Foreach`, but the user-defined function takes an extra
766  /// `unsigned int` as its first parameter, the *processing slot index*.
767  /// This *slot index* will be assigned a different value, `0` to `poolSize - 1`,
768  /// for each thread of execution.
769  /// This is meant as a helper in writing thread-safe `Foreach`
770  /// actions when using `RDataFrame` after `ROOT::EnableImplicitMT()`.
771  /// The user-defined processing callable is able to follow different
772  /// *streams of processing* indexed by the first parameter.
773  /// `ForeachSlot` works just as well with single-thread execution: in that
774  /// case `slot` will always be `0`.
775  ///
776  /// ### Example usage:
777  /// ~~~{.cpp}
778  /// myDf.ForeachSlot([](unsigned int s, int i){ std::cout << "Slot " << s << ": "<< i << std::endl;}, {"myIntColumn"});
779  /// ~~~
780  // clang-format on
781  template <typename F>
782  void ForeachSlot(F f, const ColumnNames_t &columns = {})
783  {
784  using ColTypes_t = TypeTraits::RemoveFirstParameter_t<typename TTraits::CallableTraits<F>::arg_types>;
785  constexpr auto nColumns = ColTypes_t::list_size;
786 
787  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
788 
789  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
790 
791  using Helper_t = RDFInternal::ForeachSlotHelper<F>;
792  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
793 
794  auto action =
795  std::make_unique<Action_t>(Helper_t(std::move(f)), validColumnNames, fProxiedPtr, std::move(newColumns));
796  fLoopManager->Book(action.get());
797 
798  fLoopManager->Run();
799  }
800 
801  // clang-format off
802  ////////////////////////////////////////////////////////////////////////////
803  /// \brief Execute a user-defined reduce operation on the values of a column.
804  /// \tparam F The type of the reduce callable. Automatically deduced.
805  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
806  /// \param[in] f A callable with signature `T(T,T)`
807  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
808  /// \return the reduced quantity wrapped in a `RResultPtr`.
809  ///
810  /// A reduction takes two values of a column and merges them into one (e.g.
811  /// by summing them, taking the maximum, etc). This action performs the
812  /// specified reduction operation on all processed column values, returning
813  /// a single value of the same type. The callable f must satisfy the general
814  /// requirements of a *processing function* besides having signature `T(T,T)`
815  /// where `T` is the type of column columnName.
816  ///
817  /// The returned reduced value of each thread (e.g. the initial value of a sum) is initialized to a
818  /// default-constructed T object. This is commonly expected to be the neutral/identity element for the specific
819  /// reduction operation `f` (e.g. 0 for a sum, 1 for a product). If a default-constructed T does not satisfy this
820  /// requirement, users should explicitly specify an initialization value for T by calling the appropriate `Reduce`
821  /// overload.
822  ///
823  /// ### Example usage:
824  /// ~~~{.cpp}
825  /// auto sumOfIntCol = d.Reduce([](int x, int y) { return x + y; }, "intCol");
826  /// ~~~
827  ///
828  /// This action is *lazy*: upon invocation of this method the calculation is
829  /// booked but not executed. See RResultPtr documentation.
830  // clang-format on
831  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
832  RResultPtr<T> Reduce(F f, std::string_view columnName = "")
833  {
834  static_assert(
835  std::is_default_constructible<T>::value,
836  "reduce object cannot be default-constructed. Please provide an initialisation value (redIdentity)");
837  return Reduce(std::move(f), columnName, T());
838  }
839 
840  ////////////////////////////////////////////////////////////////////////////
841  /// \brief Execute a user-defined reduce operation on the values of a column.
842  /// \tparam F The type of the reduce callable. Automatically deduced.
843  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
844  /// \param[in] f A callable with signature `T(T,T)`
845  /// \param[in] columnName The column to be reduced. If omitted, the first default column is used instead.
846  /// \param[in] redIdentity The reduced object of each thread is initialised to this value.
847  /// \return the reduced quantity wrapped in a `RResultPtr`.
848  ///
849  /// ### Example usage:
850  /// ~~~{.cpp}
851  /// auto sumOfIntColWithOffset = d.Reduce([](int x, int y) { return x + y; }, "intCol", 42);
852  /// ~~~
853  /// See the description of the first Reduce overload for more information.
854  template <typename F, typename T = typename TTraits::CallableTraits<F>::ret_type>
855  RResultPtr<T> Reduce(F f, std::string_view columnName, const T &redIdentity)
856  {
857  return Aggregate(f, f, columnName, redIdentity);
858  }
859 
860  ////////////////////////////////////////////////////////////////////////////
861  /// \brief Return the number of entries processed (*lazy action*)
862  /// \return the number of entries wrapped in a `RResultPtr`.
863  ///
864  /// Useful e.g. for counting the number of entries passing a certain filter (see also `Report`).
865  /// This action is *lazy*: upon invocation of this method the calculation is
866  /// booked but not executed. See RResultPtr documentation.
867  ///
868  /// ### Example usage:
869  /// ~~~{.cpp}
870  /// auto nEntriesAfterCuts = myFilteredDf.Count();
871  /// ~~~
872  ///
873  RResultPtr<ULong64_t> Count()
874  {
875  const auto nSlots = fLoopManager->GetNSlots();
876  auto cSPtr = std::make_shared<ULong64_t>(0);
877  using Helper_t = RDFInternal::CountHelper;
878  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
879  auto action =
880  std::make_unique<Action_t>(Helper_t(cSPtr, nSlots), ColumnNames_t({}), fProxiedPtr, std::move(fCustomColumns));
881  fLoopManager->Book(action.get());
882  return MakeResultPtr(cSPtr, *fLoopManager, std::move(action));
883  }
884 
885  ////////////////////////////////////////////////////////////////////////////
886  /// \brief Return a collection of values of a column (*lazy action*, returns a std::vector by default)
887  /// \tparam T The type of the column.
888  /// \tparam COLL The type of collection used to store the values.
889  /// \param[in] column The name of the column to collect the values of.
890  /// \return the content of the selected column wrapped in a `RResultPtr`.
891  ///
892  /// The collection type to be specified for C-style array columns is `RVec<T>`:
893  /// in this case the returned collection is a `std::vector<RVec<T>>`.
894  /// ### Example usage:
895  /// ~~~{.cpp}
896  /// // In this case intCol is a std::vector<int>
897  /// auto intCol = rdf.Take<int>("integerColumn");
898  /// // Same content as above but in this case taken as a RVec<int>
899  /// auto intColAsRVec = rdf.Take<int, RVec<int>>("integerColumn");
900  /// // In this case intCol is a std::vector<RVec<int>>, a collection of collections
901  /// auto cArrayIntCol = rdf.Take<RVec<int>>("cArrayInt");
902  /// ~~~
903  /// This action is *lazy*: upon invocation of this method the calculation is
904  /// booked but not executed. See RResultPtr documentation.
905  template <typename T, typename COLL = std::vector<T>>
906  RResultPtr<COLL> Take(std::string_view column = "")
907  {
908  const auto columns = column.empty() ? ColumnNames_t() : ColumnNames_t({std::string(column)});
909 
910  const auto validColumnNames = GetValidatedColumnNames(1, columns);
911 
912  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<1>(), TTraits::TypeList<T>());
913 
914  using Helper_t = RDFInternal::TakeHelper<T, T, COLL>;
915  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
916  auto valuesPtr = std::make_shared<COLL>();
917  const auto nSlots = fLoopManager->GetNSlots();
918 
919  auto action =
920  std::make_unique<Action_t>(Helper_t(valuesPtr, nSlots), validColumnNames, fProxiedPtr, std::move(newColumns));
921  fLoopManager->Book(action.get());
922  return MakeResultPtr(valuesPtr, *fLoopManager, std::move(action));
923  }
924 
925  ////////////////////////////////////////////////////////////////////////////
926  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
927  /// \tparam V The type of the column used to fill the histogram.
928  /// \param[in] model The returned histogram will be constructed using this as a model.
929  /// \param[in] vName The name of the column that will fill the histogram.
930  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
931  ///
932  /// Columns can be of a container type (e.g. `std::vector<double>`), in which case the histogram
933  /// is filled with each one of the elements of the container. In case multiple columns of container type
934  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
935  /// possibly different lengths between events).
936  /// This action is *lazy*: upon invocation of this method the calculation is
937  /// booked but not executed. See RResultPtr documentation.
938  ///
939  /// ### Example usage:
940  /// ~~~{.cpp}
941  /// // Deduce column type (this invocation needs jitting internally)
942  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
943  /// // Explicit column type
944  /// auto myHist2 = myDf.Histo1D<float>({"histName", "histTitle", 64u, 0., 128.}, "myColumn");
945  /// ~~~
946  ///
947  template <typename V = RDFDetail::RInferredType>
948  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.}, std::string_view vName = "")
949  {
950  const auto userColumns = vName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(vName)});
951 
952  const auto validatedColumns = GetValidatedColumnNames(1, userColumns);
953 
954  std::shared_ptr<::TH1D> h(nullptr);
955  {
956  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
957  h = model.GetHistogram();
958  h->SetDirectory(nullptr);
959  }
960 
961  if (h->GetXaxis()->GetXmax() == h->GetXaxis()->GetXmin())
962  RDFInternal::HistoUtils<::TH1D>::SetCanExtendAllAxes(*h);
963  return CreateAction<RDFInternal::ActionTags::Histo1D, V>(validatedColumns, h);
964  }
965 
966  ////////////////////////////////////////////////////////////////////////////
967  /// \brief Fill and return a one-dimensional histogram with the values of a column (*lazy action*)
968  /// \tparam V The type of the column used to fill the histogram.
969  /// \param[in] vName The name of the column that will fill the histogram.
970  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
971  ///
972  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
973  /// The "name" and "title" strings are built starting from the input column name.
974  /// See the description of the first Histo1D overload for more details.
975  ///
976  /// ### Example usage:
977  /// ~~~{.cpp}
978  /// // Deduce column type (this invocation needs jitting internally)
979  /// auto myHist1 = myDf.Histo1D("myColumn");
980  /// // Explicit column type
981  /// auto myHist2 = myDf.Histo1D<float>("myColumn");
982  /// ~~~
983  ///
984  template <typename V = RDFDetail::RInferredType>
985  RResultPtr<::TH1D> Histo1D(std::string_view vName)
986  {
987  const auto h_name = std::string(vName);
988  const auto h_title = h_name + ";" + h_name + ";count";
989  return Histo1D<V>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName);
990  }
991 
992  ////////////////////////////////////////////////////////////////////////////
993  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
994  /// \tparam V The type of the column used to fill the histogram.
995  /// \tparam W The type of the column used as weights.
996  /// \param[in] model The returned histogram will be constructed using this as a model.
997  /// \param[in] vName The name of the column that will fill the histogram.
998  /// \param[in] wName The name of the column that will provide the weights.
999  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1000  ///
1001  /// See the description of the first Histo1D overload for more details.
1002  ///
1003  /// ### Example usage:
1004  /// ~~~{.cpp}
1005  /// // Deduce column type (this invocation needs jitting internally)
1006  /// auto myHist1 = myDf.Histo1D({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1007  /// // Explicit column type
1008  /// auto myHist2 = myDf.Histo1D<float, int>({"histName", "histTitle", 64u, 0., 128.}, "myValue", "myweight");
1009  /// ~~~
1010  ///
1011  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1012  RResultPtr<::TH1D> Histo1D(const TH1DModel &model, std::string_view vName, std::string_view wName)
1013  {
1014  const std::vector<std::string_view> columnViews = {vName, wName};
1015  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1016  ? ColumnNames_t()
1017  : ColumnNames_t(columnViews.begin(), columnViews.end());
1018  std::shared_ptr<::TH1D> h(nullptr);
1019  {
1020  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1021  h = model.GetHistogram();
1022  }
1023  return CreateAction<RDFInternal::ActionTags::Histo1D, V, W>(userColumns, h);
1024  }
1025 
1026  ////////////////////////////////////////////////////////////////////////////
1027  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1028  /// \tparam V The type of the column used to fill the histogram.
1029  /// \tparam W The type of the column used as weights.
1030  /// \param[in] vName The name of the column that will fill the histogram.
1031  /// \param[in] wName The name of the column that will provide the weights.
1032  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1033  ///
1034  /// This overload uses a default model histogram TH1D(name, title, 128u, 0., 0.).
1035  /// The "name" and "title" strings are built starting from the input column names.
1036  /// See the description of the first Histo1D overload for more details.
1037  ///
1038  /// ### Example usage:
1039  /// ~~~{.cpp}
1040  /// // Deduce column types (this invocation needs jitting internally)
1041  /// auto myHist1 = myDf.Histo1D("myValue", "myweight");
1042  /// // Explicit column types
1043  /// auto myHist2 = myDf.Histo1D<float, int>("myValue", "myweight");
1044  /// ~~~
1045  ///
1046  template <typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1047  RResultPtr<::TH1D> Histo1D(std::string_view vName, std::string_view wName)
1048  {
1049  // We build name and title based on the value and weight column names
1050  std::string str_vName{vName};
1051  std::string str_wName{wName};
1052  const auto h_name = str_vName + "_weighted_" + str_wName;
1053  const auto h_title = str_vName + ", weights: " + str_wName + ";" + str_vName + ";count * " + str_wName;
1054  return Histo1D<V, W>({h_name.c_str(), h_title.c_str(), 128u, 0., 0.}, vName, wName);
1055  }
1056 
1057  ////////////////////////////////////////////////////////////////////////////
1058  /// \brief Fill and return a one-dimensional histogram with the weighted values of a column (*lazy action*)
1059  /// \tparam V The type of the column used to fill the histogram.
1060  /// \tparam W The type of the column used as weights.
1061  /// \param[in] model The returned histogram will be constructed using this as a model.
1062  /// \return the monodimensional histogram wrapped in a `RResultPtr`.
1063  ///
1064  /// This overload will use the first two default columns as column names.
1065  /// See the description of the first Histo1D overload for more details.
1066  template <typename V, typename W>
1067  RResultPtr<::TH1D> Histo1D(const TH1DModel &model = {"", "", 128u, 0., 0.})
1068  {
1069  return Histo1D<V, W>(model, "", "");
1070  }
1071 
1072  ////////////////////////////////////////////////////////////////////////////
1073  /// \brief Fill and return a two-dimensional histogram (*lazy action*)
1074  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1075  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1076  /// \param[in] model The returned histogram will be constructed using this as a model.
1077  /// \param[in] v1Name The name of the column that will fill the x axis.
1078  /// \param[in] v2Name The name of the column that will fill the y axis.
1079  /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1080  ///
1081  /// Columns can be of a container type (e.g. std::vector<double>), in which case the histogram
1082  /// is filled with each one of the elements of the container. In case multiple columns of container type
1083  /// are provided (e.g. values and weights) they must have the same length for each one of the events (but
1084  /// possibly different lengths between events).
1085  /// This action is *lazy*: upon invocation of this method the calculation is
1086  /// booked but not executed. See RResultPtr documentation.
1087  ///
1088  /// ### Example usage:
1089  /// ~~~{.cpp}
1090  /// // Deduce column types (this invocation needs jitting internally)
1091  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1092  /// // Explicit column types
1093  /// auto myHist2 = myDf.Histo2D<float, float>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY");
1094  /// ~~~
1095  ///
1096  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1097  RResultPtr<::TH2D> Histo2D(const TH2DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1098  {
1099  std::shared_ptr<::TH2D> h(nullptr);
1100  {
1101  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1102  h = model.GetHistogram();
1103  }
1104  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1105  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1106  }
1107  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1108  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1109  ? ColumnNames_t()
1110  : ColumnNames_t(columnViews.begin(), columnViews.end());
1111  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2>(userColumns, h);
1112  }
1113 
1114  ////////////////////////////////////////////////////////////////////////////
1115  /// \brief Fill and return a weighted two-dimensional histogram (*lazy action*)
1116  /// \tparam V1 The type of the column used to fill the x axis of the histogram.
1117  /// \tparam V2 The type of the column used to fill the y axis of the histogram.
1118  /// \tparam W The type of the column used for the weights of the histogram.
1119  /// \param[in] model The returned histogram will be constructed using this as a model.
1120  /// \param[in] v1Name The name of the column that will fill the x axis.
1121  /// \param[in] v2Name The name of the column that will fill the y axis.
1122  /// \param[in] wName The name of the column that will provide the weights.
1123  /// \return the bidimensional histogram wrapped in a `RResultPtr`.
1124  ///
1125  /// This action is *lazy*: upon invocation of this method the calculation is
1126  /// booked but not executed. See RResultPtr documentation.
1127  /// The user gives up ownership of the model histogram.
1128  ///
1129  /// ### Example usage:
1130  /// ~~~{.cpp}
1131  /// // Deduce column types (this invocation needs jitting internally)
1132  /// auto myHist1 = myDf.Histo2D({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1133  /// // Explicit column types
1134  /// auto myHist2 = myDf.Histo2D<float, float, double>({"histName", "histTitle", 64u, 0., 128., 32u, -4., 4.}, "myValueX", "myValueY", "myWeight");
1135  /// ~~~
1136  ///
1137  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1138  typename W = RDFDetail::RInferredType>
1139  RResultPtr<::TH2D>
1140  Histo2D(const TH2DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1141  {
1142  std::shared_ptr<::TH2D> h(nullptr);
1143  {
1144  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1145  h = model.GetHistogram();
1146  }
1147  if (!RDFInternal::HistoUtils<::TH2D>::HasAxisLimits(*h)) {
1148  throw std::runtime_error("2D histograms with no axes limits are not supported yet.");
1149  }
1150  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1151  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1152  ? ColumnNames_t()
1153  : ColumnNames_t(columnViews.begin(), columnViews.end());
1154  return CreateAction<RDFInternal::ActionTags::Histo2D, V1, V2, W>(userColumns, h);
1155  }
1156 
1157  template <typename V1, typename V2, typename W>
1158  RResultPtr<::TH2D> Histo2D(const TH2DModel &model)
1159  {
1160  return Histo2D<V1, V2, W>(model, "", "", "");
1161  }
1162 
1163  ////////////////////////////////////////////////////////////////////////////
1164  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1165  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1166  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1167  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1168  /// \param[in] model The returned histogram will be constructed using this as a model.
1169  /// \param[in] v1Name The name of the column that will fill the x axis.
1170  /// \param[in] v2Name The name of the column that will fill the y axis.
1171  /// \param[in] v3Name The name of the column that will fill the z axis.
1172  /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1173  ///
1174  /// This action is *lazy*: upon invocation of this method the calculation is
1175  /// booked but not executed. See RResultPtr documentation.
1176  ///
1177  /// ### Example usage:
1178  /// ~~~{.cpp}
1179  /// // Deduce column types (this invocation needs jitting internally)
1180  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1181  /// "myValueX", "myValueY", "myValueZ");
1182  /// // Explicit column types
1183  /// auto myHist2 = myDf.Histo3D<double, double, float>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1184  /// "myValueX", "myValueY", "myValueZ");
1185  /// ~~~
1186  ///
1187  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1188  typename V3 = RDFDetail::RInferredType>
1189  RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name = "", std::string_view v2Name = "",
1190  std::string_view v3Name = "")
1191  {
1192  std::shared_ptr<::TH3D> h(nullptr);
1193  {
1194  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1195  h = model.GetHistogram();
1196  }
1197  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1198  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1199  }
1200  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1201  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1202  ? ColumnNames_t()
1203  : ColumnNames_t(columnViews.begin(), columnViews.end());
1204  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3>(userColumns, h);
1205  }
1206 
1207  ////////////////////////////////////////////////////////////////////////////
1208  /// \brief Fill and return a three-dimensional histogram (*lazy action*)
1209  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1210  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1211  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1212  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1213  /// \param[in] model The returned histogram will be constructed using this as a model.
1214  /// \param[in] v1Name The name of the column that will fill the x axis.
1215  /// \param[in] v2Name The name of the column that will fill the y axis.
1216  /// \param[in] v3Name The name of the column that will fill the z axis.
1217  /// \param[in] wName The name of the column that will provide the weights.
1218  /// \return the tridimensional histogram wrapped in a `RResultPtr`.
1219  ///
1220  /// This action is *lazy*: upon invocation of this method the calculation is
1221  /// booked but not executed. See RResultPtr documentation.
1222  ///
1223  /// ### Example usage:
1224  /// ~~~{.cpp}
1225  /// // Deduce column types (this invocation needs jitting internally)
1226  /// auto myHist1 = myDf.Histo3D({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1227  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1228  /// // Explicit column types
1229  /// using d_t = double;
1230  /// auto myHist2 = myDf.Histo3D<d_t, d_t, float, d_t>({"name", "title", 64u, 0., 128., 32u, -4., 4., 8u, -2., 2.},
1231  /// "myValueX", "myValueY", "myValueZ", "myWeight");
1232  /// ~~~
1233  ///
1234  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1235  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1236  RResultPtr<::TH3D> Histo3D(const TH3DModel &model, std::string_view v1Name, std::string_view v2Name,
1237  std::string_view v3Name, std::string_view wName)
1238  {
1239  std::shared_ptr<::TH3D> h(nullptr);
1240  {
1241  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1242  h = model.GetHistogram();
1243  }
1244  if (!RDFInternal::HistoUtils<::TH3D>::HasAxisLimits(*h)) {
1245  throw std::runtime_error("3D histograms with no axes limits are not supported yet.");
1246  }
1247  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1248  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1249  ? ColumnNames_t()
1250  : ColumnNames_t(columnViews.begin(), columnViews.end());
1251  return CreateAction<RDFInternal::ActionTags::Histo3D, V1, V2, V3, W>(userColumns, h);
1252  }
1253 
1254  template <typename V1, typename V2, typename V3, typename W>
1255  RResultPtr<::TH3D> Histo3D(const TH3DModel &model)
1256  {
1257  return Histo3D<V1, V2, V3, W>(model, "", "", "", "");
1258  }
1259 
1260  ////////////////////////////////////////////////////////////////////////////
1261  /// \brief Fill and return a graph (*lazy action*)
1262  /// \tparam V1 The type of the column used to fill the x axis of the graph.
1263  /// \tparam V2 The type of the column used to fill the y axis of the graph.
1264  /// \param[in] v1Name The name of the column that will fill the x axis.
1265  /// \param[in] v2Name The name of the column that will fill the y axis.
1266  /// \return the graph wrapped in a `RResultPtr`.
1267  ///
1268  /// Columns can be of a container type (e.g. std::vector<double>), in which case the graph
1269  /// is filled with each one of the elements of the container.
1270  /// If Multithreading is enabled, the order in which points are inserted is undefined.
1271  /// If the Graph has to be drawn, it is suggested to the user to sort it on the x before printing.
1272  /// A name and a title to the graph is given based on the input column names.
1273  ///
1274  /// This action is *lazy*: upon invocation of this method the calculation is
1275  /// booked but not executed. See RResultPtr documentation.
1276  ///
1277  /// ### Example usage:
1278  /// ~~~{.cpp}
1279  /// // Deduce column types (this invocation needs jitting internally)
1280  /// auto myGraph1 = myDf.Graph("xValues", "yValues");
1281  /// // Explicit column types
1282  /// auto myGraph2 = myDf.Graph<int, float>("xValues", "yValues");
1283  /// ~~~
1284  ///
1285  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1286  RResultPtr<::TGraph> Graph(std::string_view v1Name = "", std::string_view v2Name = "")
1287  {
1288  auto graph = std::make_shared<::TGraph>();
1289  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1290  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1291  ? ColumnNames_t()
1292  : ColumnNames_t(columnViews.begin(), columnViews.end());
1293 
1294  const auto validatedColumns = GetValidatedColumnNames(2, userColumns);
1295 
1296  // We build a default name and title based on the input columns
1297  if (!(validatedColumns[0].empty() && validatedColumns[1].empty())) {
1298  const auto g_name = std::string(v1Name) + "_vs_" + std::string(v2Name);
1299  const auto g_title = std::string(v1Name) + " vs " + std::string(v2Name);
1300  graph->SetNameTitle(g_name.c_str(), g_title.c_str());
1301  graph->GetXaxis()->SetTitle(std::string(v1Name).c_str());
1302  graph->GetYaxis()->SetTitle(std::string(v2Name).c_str());
1303  }
1304 
1305  return CreateAction<RDFInternal::ActionTags::Graph, V1, V2>(validatedColumns, graph);
1306  }
1307 
1308  ////////////////////////////////////////////////////////////////////////////
1309  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1310  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1311  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1312  /// \param[in] model The model to be considered to build the new return value.
1313  /// \param[in] v1Name The name of the column that will fill the x axis.
1314  /// \param[in] v2Name The name of the column that will fill the y axis.
1315  /// \return the monodimensional profile wrapped in a `RResultPtr`.
1316  ///
1317  /// This action is *lazy*: upon invocation of this method the calculation is
1318  /// booked but not executed. See RResultPtr documentation.
1319  ///
1320  /// ### Example usage:
1321  /// ~~~{.cpp}
1322  /// // Deduce column types (this invocation needs jitting internally)
1323  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1324  /// // Explicit column types
1325  /// auto myProf2 = myDf.Graph<int, float>({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues");
1326  /// ~~~
1327  ///
1328  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType>
1329  RResultPtr<::TProfile>
1330  Profile1D(const TProfile1DModel &model, std::string_view v1Name = "", std::string_view v2Name = "")
1331  {
1332  std::shared_ptr<::TProfile> h(nullptr);
1333  {
1334  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1335  h = model.GetProfile();
1336  }
1337 
1338  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1339  throw std::runtime_error("Profiles with no axes limits are not supported yet.");
1340  }
1341  const std::vector<std::string_view> columnViews = {v1Name, v2Name};
1342  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1343  ? ColumnNames_t()
1344  : ColumnNames_t(columnViews.begin(), columnViews.end());
1345  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2>(userColumns, h);
1346  }
1347 
1348  ////////////////////////////////////////////////////////////////////////////
1349  /// \brief Fill and return a one-dimensional profile (*lazy action*)
1350  /// \tparam V1 The type of the column the values of which are used to fill the profile. Inferred if not present.
1351  /// \tparam V2 The type of the column the values of which are used to fill the profile. Inferred if not present.
1352  /// \tparam W The type of the column the weights of which are used to fill the profile. Inferred if not present.
1353  /// \param[in] model The model to be considered to build the new return value.
1354  /// \param[in] v1Name The name of the column that will fill the x axis.
1355  /// \param[in] v2Name The name of the column that will fill the y axis.
1356  /// \param[in] wName The name of the column that will provide the weights.
1357  /// \return the monodimensional profile wrapped in a `RResultPtr`.
1358  ///
1359  /// This action is *lazy*: upon invocation of this method the calculation is
1360  /// booked but not executed. See RResultPtr documentation.
1361  ///
1362  /// ### Example usage:
1363  /// ~~~{.cpp}
1364  /// // Deduce column types (this invocation needs jitting internally)
1365  /// auto myProf1 = myDf.Profile1D({"profName", "profTitle", 64u, -4., 4.}, "xValues", "yValues", "weight");
1366  /// // Explicit column types
1367  /// auto myProf2 = myDf.Profile1D<int, float, double>({"profName", "profTitle", 64u, -4., 4.},
1368  /// "xValues", "yValues", "weight");
1369  /// ~~~
1370  ///
1371  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1372  typename W = RDFDetail::RInferredType>
1373  RResultPtr<::TProfile>
1374  Profile1D(const TProfile1DModel &model, std::string_view v1Name, std::string_view v2Name, std::string_view wName)
1375  {
1376  std::shared_ptr<::TProfile> h(nullptr);
1377  {
1378  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1379  h = model.GetProfile();
1380  }
1381 
1382  if (!RDFInternal::HistoUtils<::TProfile>::HasAxisLimits(*h)) {
1383  throw std::runtime_error("Profile histograms with no axes limits are not supported yet.");
1384  }
1385  const std::vector<std::string_view> columnViews = {v1Name, v2Name, wName};
1386  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1387  ? ColumnNames_t()
1388  : ColumnNames_t(columnViews.begin(), columnViews.end());
1389  return CreateAction<RDFInternal::ActionTags::Profile1D, V1, V2, W>(userColumns, h);
1390  }
1391 
1392  template <typename V1, typename V2, typename W>
1393  RResultPtr<::TProfile> Profile1D(const TProfile1DModel &model)
1394  {
1395  return Profile1D<V1, V2, W>(model, "", "", "");
1396  }
1397 
1398  ////////////////////////////////////////////////////////////////////////////
1399  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1400  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1401  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1402  /// \tparam V2 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1403  /// \param[in] model The returned profile will be constructed using this as a model.
1404  /// \param[in] v1Name The name of the column that will fill the x axis.
1405  /// \param[in] v2Name The name of the column that will fill the y axis.
1406  /// \param[in] v3Name The name of the column that will fill the z axis.
1407  /// \return the bidimensional profile wrapped in a `RResultPtr`.
1408  ///
1409  /// This action is *lazy*: upon invocation of this method the calculation is
1410  /// booked but not executed. See RResultPtr documentation.
1411  ///
1412  /// ### Example usage:
1413  /// ~~~{.cpp}
1414  /// // Deduce column types (this invocation needs jitting internally)
1415  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1416  /// "xValues", "yValues", "zValues");
1417  /// // Explicit column types
1418  /// auto myProf2 = myDf.Profile2D<int, float, double>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1419  /// "xValues", "yValues", "zValues");
1420  /// ~~~
1421  ///
1422  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1423  typename V3 = RDFDetail::RInferredType>
1424  RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name = "",
1425  std::string_view v2Name = "", std::string_view v3Name = "")
1426  {
1427  std::shared_ptr<::TProfile2D> h(nullptr);
1428  {
1429  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1430  h = model.GetProfile();
1431  }
1432 
1433  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1434  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1435  }
1436  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name};
1437  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1438  ? ColumnNames_t()
1439  : ColumnNames_t(columnViews.begin(), columnViews.end());
1440  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3>(userColumns, h);
1441  }
1442 
1443  ////////////////////////////////////////////////////////////////////////////
1444  /// \brief Fill and return a two-dimensional profile (*lazy action*)
1445  /// \tparam V1 The type of the column used to fill the x axis of the histogram. Inferred if not present.
1446  /// \tparam V2 The type of the column used to fill the y axis of the histogram. Inferred if not present.
1447  /// \tparam V3 The type of the column used to fill the z axis of the histogram. Inferred if not present.
1448  /// \tparam W The type of the column used for the weights of the histogram. Inferred if not present.
1449  /// \param[in] model The returned histogram will be constructed using this as a model.
1450  /// \param[in] v1Name The name of the column that will fill the x axis.
1451  /// \param[in] v2Name The name of the column that will fill the y axis.
1452  /// \param[in] v3Name The name of the column that will fill the z axis.
1453  /// \param[in] wName The name of the column that will provide the weights.
1454  /// \return the bidimensional profile wrapped in a `RResultPtr`.
1455  ///
1456  /// This action is *lazy*: upon invocation of this method the calculation is
1457  /// booked but not executed. See RResultPtr documentation.
1458  ///
1459  /// ### Example usage:
1460  /// ~~~{.cpp}
1461  /// // Deduce column types (this invocation needs jitting internally)
1462  /// auto myProf1 = myDf.Profile2D({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1463  /// "xValues", "yValues", "zValues", "weight");
1464  /// // Explicit column types
1465  /// auto myProf2 = myDf.Profile2D<int, float, double, int>({"profName", "profTitle", 40, -4, 4, 40, -4, 4, 0, 20},
1466  /// "xValues", "yValues", "zValues", "weight");
1467  /// ~~~
1468  ///
1469  template <typename V1 = RDFDetail::RInferredType, typename V2 = RDFDetail::RInferredType,
1470  typename V3 = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1471  RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model, std::string_view v1Name, std::string_view v2Name,
1472  std::string_view v3Name, std::string_view wName)
1473  {
1474  std::shared_ptr<::TProfile2D> h(nullptr);
1475  {
1476  ROOT::Internal::RDF::RIgnoreErrorLevelRAII iel(kError);
1477  h = model.GetProfile();
1478  }
1479 
1480  if (!RDFInternal::HistoUtils<::TProfile2D>::HasAxisLimits(*h)) {
1481  throw std::runtime_error("2D profiles with no axes limits are not supported yet.");
1482  }
1483  const std::vector<std::string_view> columnViews = {v1Name, v2Name, v3Name, wName};
1484  const auto userColumns = RDFInternal::AtLeastOneEmptyString(columnViews)
1485  ? ColumnNames_t()
1486  : ColumnNames_t(columnViews.begin(), columnViews.end());
1487  return CreateAction<RDFInternal::ActionTags::Profile2D, V1, V2, V3, W>(userColumns, h);
1488  }
1489 
1490  template <typename V1, typename V2, typename V3, typename W>
1491  RResultPtr<::TProfile2D> Profile2D(const TProfile2DModel &model)
1492  {
1493  return Profile2D<V1, V2, V3, W>(model, "", "", "", "");
1494  }
1495 
1496  ////////////////////////////////////////////////////////////////////////////
1497  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1498  ///
1499  /// T must be a type that provides a copy- or move-constructor and a `T::Fill` method that takes as many arguments
1500  /// as the column names pass as columnList. The arguments of `T::Fill` must have type equal to the one of the
1501  /// specified columns (these types are passed as template parameters to this method).
1502  /// \tparam FirstColumn The first type of the column the values of which are used to fill the object.
1503  /// \tparam OtherColumns A list of the other types of the columns the values of which are used to fill the object.
1504  /// \tparam T The type of the object to fill. Automatically deduced.
1505  /// \param[in] model The model to be considered to build the new return value.
1506  /// \param[in] columnList A list containing the names of the columns that will be passed when calling `Fill`
1507  /// \return the filled object wrapped in a `RResultPtr`.
1508  ///
1509  /// The user gives up ownership of the model object.
1510  /// The list of column names to be used for filling must always be specified.
1511  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed.
1512  /// See RResultPtr documentation.
1513  ///
1514  /// ### Example usage:
1515  /// ~~~{.cpp}
1516  /// MyClass obj;
1517  /// auto myFilledObj = myDf.Fill<float>(obj, {"col0", "col1"});
1518  /// ~~~
1519  ///
1520  template <typename FirstColumn, typename... OtherColumns, typename T> // need FirstColumn to disambiguate overloads
1521  RResultPtr<T> Fill(T &&model, const ColumnNames_t &columnList)
1522  {
1523  auto h = std::make_shared<T>(std::forward<T>(model));
1524  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1525  throw std::runtime_error("The absence of axes limits is not supported yet.");
1526  }
1527  return CreateAction<RDFInternal::ActionTags::Fill, FirstColumn, OtherColumns...>(columnList, h);
1528  }
1529 
1530  ////////////////////////////////////////////////////////////////////////////
1531  /// \brief Return an object of type T on which `T::Fill` will be called once per event (*lazy action*)
1532  ///
1533  /// This overload infers the types of the columns specified in columnList at runtime and just-in-time compiles the
1534  /// method with these types. See previous overload for more information.
1535  /// \tparam T The type of the object to fill. Automatically deduced.
1536  /// \param[in] model The model to be considered to build the new return value.
1537  /// \param[in] columnList The name of the columns read to fill the object.
1538  /// \return the filled object wrapped in a `RResultPtr`.
1539  ///
1540  /// This overload of `Fill` infers the type of the specified columns at runtime and just-in-time compiles the
1541  /// previous overload. Check the previous overload for more details on `Fill`.
1542  ///
1543  /// ### Example usage:
1544  /// ~~~{.cpp}
1545  /// MyClass obj;
1546  /// auto myFilledObj = myDf.Fill(obj, {"col0", "col1"});
1547  /// ~~~
1548  ///
1549  template <typename T>
1550  RResultPtr<T> Fill(T &&model, const ColumnNames_t &bl)
1551  {
1552  auto h = std::make_shared<T>(std::forward<T>(model));
1553  if (!RDFInternal::HistoUtils<T>::HasAxisLimits(*h)) {
1554  throw std::runtime_error("The absence of axes limits is not supported yet.");
1555  }
1556  return CreateAction<RDFInternal::ActionTags::Fill, RDFDetail::RInferredType>(bl, h, bl.size());
1557  }
1558 
1559  ////////////////////////////////////////////////////////////////////////////
1560  /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1561  ///
1562  /// \tparam V The type of the value column
1563  /// \param[in] value The name of the column with the values to fill the statistics with.
1564  /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1565  ///
1566  /// ### Example usage:
1567  /// ~~~{.cpp}
1568  /// // Deduce column type (this invocation needs jitting internally)
1569  /// auto stats0 = myDf.Stats("values");
1570  /// // Explicit column type
1571  /// auto stats1 = myDf.Stats<float>("values");
1572  /// ~~~
1573  ///
1574  template<typename V = RDFDetail::RInferredType>
1575  RResultPtr<TStatistic> Stats(std::string_view value = "")
1576  {
1577  ColumnNames_t columns;
1578  if (!value.empty()) {
1579  columns.emplace_back(std::string(value));
1580  }
1581  const auto validColumnNames = GetValidatedColumnNames(1, columns);
1582  if (std::is_same<V, RDFDetail::RInferredType>::value) {
1583  return Fill(TStatistic(), validColumnNames);
1584  }
1585  else {
1586  return Fill<V>(TStatistic(), validColumnNames);
1587  }
1588  }
1589 
1590  ////////////////////////////////////////////////////////////////////////////
1591  /// \brief Return a TStatistic object, filled once per event (*lazy action*)
1592  ///
1593  /// \tparam V The type of the value column
1594  /// \tparam W The type of the weight column
1595  /// \param[in] value The name of the column with the values to fill the statistics with.
1596  /// \param[in] weight The name of the column with the weights to fill the statistics with.
1597  /// \return the filled TStatistic object wrapped in a `RResultPtr`.
1598  ///
1599  /// ### Example usage:
1600  /// ~~~{.cpp}
1601  /// // Deduce column types (this invocation needs jitting internally)
1602  /// auto stats0 = myDf.Stats("values", "weights");
1603  /// // Explicit column types
1604  /// auto stats1 = myDf.Stats<int, float>("values", "weights");
1605  /// ~~~
1606  ///
1607  template<typename V = RDFDetail::RInferredType, typename W = RDFDetail::RInferredType>
1608  RResultPtr<TStatistic> Stats(std::string_view value, std::string_view weight)
1609  {
1610  ColumnNames_t columns {std::string(value), std::string(weight)};
1611  constexpr auto vIsInferred = std::is_same<V, RDFDetail::RInferredType>::value;
1612  constexpr auto wIsInferred = std::is_same<W, RDFDetail::RInferredType>::value;
1613  const auto validColumnNames = GetValidatedColumnNames(2, columns);
1614  // We have 3 cases:
1615  // 1. Both types are inferred: we use Fill and let the jit kick in.
1616  // 2. One of the two types is explicit and the other one is inferred: the case is not supported.
1617  // 3. Both types are explicit: we invoke the fully compiled Fill method.
1618  if (vIsInferred && wIsInferred) {
1619  return Fill(TStatistic(), validColumnNames);
1620  } else if (vIsInferred != wIsInferred) {
1621  std::string error("The ");
1622  error += vIsInferred ? "value " : "weight ";
1623  error += "column type is explicit, while the ";
1624  error += vIsInferred ? "weight " : "value ";
1625  error += " is specified to be inferred. This case is not supported: please specify both types or none.";
1626  throw std::runtime_error(error);
1627  } else {
1628  return Fill<V, W>(TStatistic(), validColumnNames);
1629  }
1630  }
1631 
1632  ////////////////////////////////////////////////////////////////////////////
1633  /// \brief Return the minimum of processed column values (*lazy action*)
1634  /// \tparam T The type of the branch/column.
1635  /// \param[in] columnName The name of the branch/column to be treated.
1636  /// \return the minimum value of the selected column wrapped in a `RResultPtr`.
1637  ///
1638  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1639  /// template specialization of this method.
1640  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1641  ///
1642  /// This action is *lazy*: upon invocation of this method the calculation is
1643  /// booked but not executed. See RResultPtr documentation.
1644  ///
1645  /// ### Example usage:
1646  /// ~~~{.cpp}
1647  /// // Deduce column type (this invocation needs jitting internally)
1648  /// auto minVal0 = myDf.Min("values");
1649  /// // Explicit column type
1650  /// auto minVal1 = myDf.Min<double>("values");
1651  /// ~~~
1652  ///
1653  template <typename T = RDFDetail::RInferredType>
1654  RResultPtr<RDFDetail::MinReturnType_t<T>> Min(std::string_view columnName = "")
1655  {
1656  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1657  using RetType_t = RDFDetail::MinReturnType_t<T>;
1658  auto minV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::max());
1659  return CreateAction<RDFInternal::ActionTags::Min, T>(userColumns, minV);
1660  }
1661 
1662  ////////////////////////////////////////////////////////////////////////////
1663  /// \brief Return the maximum of processed column values (*lazy action*)
1664  /// \tparam T The type of the branch/column.
1665  /// \param[in] columnName The name of the branch/column to be treated.
1666  /// \return the maximum value of the selected column wrapped in a `RResultPtr`.
1667  ///
1668  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1669  /// template specialization of this method.
1670  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1671  ///
1672  /// This action is *lazy*: upon invocation of this method the calculation is
1673  /// booked but not executed. See RResultPtr documentation.
1674  ///
1675  /// ### Example usage:
1676  /// ~~~{.cpp}
1677  /// // Deduce column type (this invocation needs jitting internally)
1678  /// auto maxVal0 = myDf.Max("values");
1679  /// // Explicit column type
1680  /// auto maxVal1 = myDf.Max<double>("values");
1681  /// ~~~
1682  ///
1683  template <typename T = RDFDetail::RInferredType>
1684  RResultPtr<RDFDetail::MaxReturnType_t<T>> Max(std::string_view columnName = "")
1685  {
1686  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1687  using RetType_t = RDFDetail::MaxReturnType_t<T>;
1688  auto maxV = std::make_shared<RetType_t>(std::numeric_limits<RetType_t>::lowest());
1689  return CreateAction<RDFInternal::ActionTags::Max, T>(userColumns, maxV);
1690  }
1691 
1692  ////////////////////////////////////////////////////////////////////////////
1693  /// \brief Return the mean of processed column values (*lazy action*)
1694  /// \tparam T The type of the branch/column.
1695  /// \param[in] columnName The name of the branch/column to be treated.
1696  /// \return the mean value of the selected column wrapped in a `RResultPtr`.
1697  ///
1698  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1699  /// template specialization of this method.
1700  ///
1701  /// This action is *lazy*: upon invocation of this method the calculation is
1702  /// booked but not executed. See RResultPtr documentation.
1703  ///
1704  /// ### Example usage:
1705  /// ~~~{.cpp}
1706  /// // Deduce column type (this invocation needs jitting internally)
1707  /// auto meanVal0 = myDf.Mean("values");
1708  /// // Explicit column type
1709  /// auto meanVal1 = myDf.Mean<double>("values");
1710  /// ~~~
1711  ///
1712  template <typename T = RDFDetail::RInferredType>
1713  RResultPtr<double> Mean(std::string_view columnName = "")
1714  {
1715  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1716  auto meanV = std::make_shared<double>(0);
1717  return CreateAction<RDFInternal::ActionTags::Mean, T>(userColumns, meanV);
1718  }
1719 
1720  ////////////////////////////////////////////////////////////////////////////
1721  /// \brief Return the unbiased standard deviation of processed column values (*lazy action*)
1722  /// \tparam T The type of the branch/column.
1723  /// \param[in] columnName The name of the branch/column to be treated.
1724  /// \return the standard deviation value of the selected column wrapped in a `RResultPtr`.
1725  ///
1726  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1727  /// template specialization of this method.
1728  ///
1729  /// This action is *lazy*: upon invocation of this method the calculation is
1730  /// booked but not executed. See RResultPtr documentation.
1731  ///
1732  /// ### Example usage:
1733  /// ~~~{.cpp}
1734  /// // Deduce column type (this invocation needs jitting internally)
1735  /// auto stdDev0 = myDf.StdDev("values");
1736  /// // Explicit column type
1737  /// auto stdDev1 = myDf.StdDev<double>("values");
1738  /// ~~~
1739  ///
1740  template <typename T = RDFDetail::RInferredType>
1741  RResultPtr<double> StdDev(std::string_view columnName = "")
1742  {
1743  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1744  auto stdDeviationV = std::make_shared<double>(0);
1745  return CreateAction<RDFInternal::ActionTags::StdDev, T>(userColumns, stdDeviationV);
1746  }
1747 
1748  // clang-format off
1749  ////////////////////////////////////////////////////////////////////////////
1750  /// \brief Return the sum of processed column values (*lazy action*)
1751  /// \tparam T The type of the branch/column.
1752  /// \param[in] columnName The name of the branch/column.
1753  /// \param[in] initValue Optional initial value for the sum. If not present, the column values must be default-constructible.
1754  /// \return the sum of the selected column wrapped in a `RResultPtr`.
1755  ///
1756  /// If T is not specified, RDataFrame will infer it from the data and just-in-time compile the correct
1757  /// template specialization of this method.
1758  /// If the type of the column is inferred, the return type is `double`, the type of the column otherwise.
1759  ///
1760  /// This action is *lazy*: upon invocation of this method the calculation is
1761  /// booked but not executed. See RResultPtr documentation.
1762  ///
1763  /// ### Example usage:
1764  /// ~~~{.cpp}
1765  /// // Deduce column type (this invocation needs jitting internally)
1766  /// auto sum0 = myDf.Sum("values");
1767  /// // Explicit column type
1768  /// auto sum1 = myDf.Sum<double>("values");
1769  /// ~~~
1770  ///
1771  template <typename T = RDFDetail::RInferredType>
1772  RResultPtr<RDFDetail::SumReturnType_t<T>>
1773  Sum(std::string_view columnName = "",
1774  const RDFDetail::SumReturnType_t<T> &initValue = RDFDetail::SumReturnType_t<T>{})
1775  {
1776  const auto userColumns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
1777  auto sumV = std::make_shared<RDFDetail::SumReturnType_t<T>>(initValue);
1778  return CreateAction<RDFInternal::ActionTags::Sum, T>(userColumns, sumV);
1779  }
1780  // clang-format on
1781 
1782  ////////////////////////////////////////////////////////////////////////////
1783  /// \brief Gather filtering statistics
1784  /// \return the resulting `RCutFlowReport` instance wrapped in a `RResultPtr`.
1785  ///
1786  /// Calling `Report` on the main `RDataFrame` object gathers stats for
1787  /// all named filters in the call graph. Calling this method on a
1788  /// stored chain state (i.e. a graph node different from the first) gathers
1789  /// the stats for all named filters in the chain section between the original
1790  /// `RDataFrame` and that node (included). Stats are gathered in the same
1791  /// order as the named filters have been added to the graph.
1792  /// A RResultPtr<RCutFlowReport> is returned to allow inspection of the
1793  /// effects cuts had.
1794  ///
1795  /// This action is *lazy*: upon invocation of
1796  /// this method the calculation is booked but not executed. See RResultPtr
1797  /// documentation.
1798  ///
1799  /// ### Example usage:
1800  /// ~~~{.cpp}
1801  /// auto filtered = d.Filter(cut1, {"b1"}, "Cut1").Filter(cut2, {"b2"}, "Cut2");
1802  /// auto cutReport = filtered3.Report();
1803  /// cutReport->Print();
1804  /// ~~~
1805  ///
1806  RResultPtr<RCutFlowReport> Report()
1807  {
1808  bool returnEmptyReport = false;
1809  // if this is a RInterface<RLoopManager> on which `Define` has been called, users
1810  // are calling `Report` on a chain of the form LoopManager->Define->Define->..., which
1811  // certainly does not contain named filters.
1812  // The number 4 takes into account the implicit columns for entry and slot number
1813  // and their aliases (2 + 2, i.e. {r,t}dfentry_ and {r,t}dfslot_)
1814  if (std::is_same<Proxied, RLoopManager>::value && fCustomColumns.GetNames().size() > 4)
1815  returnEmptyReport = true;
1816 
1817  auto rep = std::make_shared<RCutFlowReport>();
1818  using Helper_t = RDFInternal::ReportHelper<Proxied>;
1819  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
1820 
1821  auto action = std::make_unique<Action_t>(Helper_t(rep, fProxiedPtr, returnEmptyReport), ColumnNames_t({}),
1822  fProxiedPtr, RDFInternal::RBookedCustomColumns(fCustomColumns));
1823 
1824  fLoopManager->Book(action.get());
1825  return MakeResultPtr(rep, *fLoopManager, std::move(action));
1826  }
1827 
1828  /////////////////////////////////////////////////////////////////////////////
1829  /// \brief Returns the names of the available columns
1830  /// \return the container of column names.
1831  ///
1832  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1833  ///
1834  /// ### Example usage:
1835  /// ~~~{.cpp}
1836  /// auto colNames = d.GetColumnNames();
1837  /// // Print columns' names
1838  /// for (auto &&colName : colNames) std::cout << colName << std::endl;
1839  /// ~~~
1840  ///
1841  ColumnNames_t GetColumnNames()
1842  {
1843  ColumnNames_t allColumns;
1844 
1845  auto addIfNotInternal = [&allColumns](std::string_view colName) {
1846  if (!RDFInternal::IsInternalColumn(colName))
1847  allColumns.emplace_back(colName);
1848  };
1849 
1850  auto columnNames = fCustomColumns.GetNames();
1851 
1852  std::for_each(columnNames.begin(), columnNames.end(), addIfNotInternal);
1853 
1854  auto tree = fLoopManager->GetTree();
1855  if (tree) {
1856  auto branchNames = RDFInternal::GetBranchNames(*tree, /*allowDuplicates=*/false);
1857  allColumns.insert(allColumns.end(), branchNames.begin(), branchNames.end());
1858  }
1859 
1860  if (fDataSource) {
1861  auto &dsColNames = fDataSource->GetColumnNames();
1862  allColumns.insert(allColumns.end(), dsColNames.begin(), dsColNames.end());
1863  }
1864 
1865  return allColumns;
1866  }
1867 
1868  /////////////////////////////////////////////////////////////////////////////
1869  /// \brief Return the type of a given column as a string.
1870  /// \return the type of the required column.
1871  ///
1872  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1873  ///
1874  /// ### Example usage:
1875  /// ~~~{.cpp}
1876  /// auto colType = d.GetColumnType("columnName");
1877  /// // Print column type
1878  /// std::cout << "Column " << colType << " has type " << colType << std::endl;
1879  /// ~~~
1880  ///
1881  std::string GetColumnType(std::string_view column)
1882  {
1883  const auto &customCols = fCustomColumns.GetNames();
1884  const bool convertVector2RVec = true;
1885  const auto isCustom = std::find(customCols.begin(), customCols.end(), column) != customCols.end();
1886  if (!isCustom) {
1887  return RDFInternal::ColumnName2ColumnTypeName(std::string(column), fLoopManager->GetID(),
1888  fLoopManager->GetTree(), fLoopManager->GetDataSource(), isCustom,
1889  convertVector2RVec);
1890  } else {
1891  // must convert the alias "__rdf::column_type" to a readable type
1892  const auto colID = std::to_string(fCustomColumns.GetColumns().at(std::string(column))->GetID());
1893  const auto call = "ROOT::Internal::RDF::TypeID2TypeName(typeid(__rdf" + std::to_string(fLoopManager->GetID()) +
1894  "::" + std::string(column) + colID + "_type))";
1895  fLoopManager->JitDeclarations(); // some type aliases might be needed by the code jitted in the next line
1896  const auto calcRes = RDFInternal::InterpreterCalc(call);
1897  return *reinterpret_cast<std::string *>(calcRes); // copy result to stack
1898  }
1899  }
1900 
1901  /// \brief Returns the names of the filters created.
1902  /// \return the container of filters names.
1903  ///
1904  /// If called on a root node, all the filters in the computation graph will
1905  /// be printed. For any other node, only the filters upstream of that node.
1906  /// Filters without a name are printed as "Unnamed Filter"
1907  /// This is not an action nor a transformation, just a query to the RDataFrame object.
1908  ///
1909  /// ### Example usage:
1910  /// ~~~{.cpp}
1911  /// auto filtNames = d.GetFilterNames();
1912  /// for (auto &&filtName : filtNames) std::cout << filtName << std::endl;
1913  /// ~~~
1914  ///
1915  std::vector<std::string> GetFilterNames() { return RDFInternal::GetFilterNames(fProxiedPtr); }
1916 
1917  /// \brief Returns the names of the defined columns
1918  /// \return the container of the defined column names.
1919  ///
1920  /// This is not an action nor a transformation, just a simple utility to
1921  /// get the columns names that have been defined up to the node.
1922  /// If no custom column has been defined, e.g. on a root node, it returns an
1923  /// empty collection.
1924  ///
1925  /// ### Example usage:
1926  /// ~~~{.cpp}
1927  /// auto defColNames = d.GetDefinedColumnNames();
1928  /// // Print defined columns' names
1929  /// for (auto &&defColName : defColNames) std::cout << defColName << std::endl;
1930  /// ~~~
1931  ///
1932  ColumnNames_t GetDefinedColumnNames()
1933  {
1934  ColumnNames_t definedColumns;
1935 
1936  auto columns = fCustomColumns.GetColumns();
1937 
1938  for (auto column : columns) {
1939  if (!RDFInternal::IsInternalColumn(column.first) && !column.second->IsDataSourceColumn())
1940  definedColumns.emplace_back(column.first);
1941  }
1942 
1943  return definedColumns;
1944  }
1945 
1946  /// \brief Checks if a column is present in the dataset
1947  /// \return true if the column is available, false otherwise
1948  ///
1949  /// This method checks if a column is part of the input ROOT dataset, has
1950  /// been defined or can be provided by the data source.
1951  ///
1952  /// Example usage:
1953  /// ~~~{.cpp}
1954  /// ROOT::RDataFrame base(1);
1955  /// auto rdf = base.Define("definedColumn", [](){return 0;});
1956  /// rdf.HasColumn("definedColumn"); // true: we defined it
1957  /// rdf.HasColumn("rdfentry_"); // true: it's always there
1958  /// rdf.HasColumn("foo"); // false: it is not there
1959  /// ~~~
1960  bool HasColumn(std::string_view columnName)
1961  {
1962  if (fCustomColumns.HasName(columnName))
1963  return true;
1964 
1965  if (auto tree = fLoopManager->GetTree()) {
1966  const auto &branchNames = fLoopManager->GetBranchNames();
1967  const auto branchNamesEnd = branchNames.end();
1968  if (branchNamesEnd != std::find(branchNames.begin(), branchNamesEnd, columnName))
1969  return true;
1970  }
1971 
1972  if (fDataSource && fDataSource->HasColumn(columnName))
1973  return true;
1974 
1975  return false;
1976  }
1977 
1978  /// \brief Gets the number of data processing slots
1979  /// \return The number of data processing slots used by this RDataFrame instance
1980  ///
1981  /// This method returns the number of data processing slots used by this RDataFrame
1982  /// instance. This number is influenced by the global switch ROOT::EnableImplicitMT().
1983  ///
1984  /// Example usage:
1985  /// ~~~{.cpp}
1986  /// ROOT::EnableImplicitMT(6)
1987  /// ROOT::RDataFrame df(1);
1988  /// std::cout << df.GetNSlots() << std::endl; // prints "6"
1989  /// ~~~
1990  unsigned int GetNSlots() const { return fLoopManager->GetNSlots(); }
1991 
1992  // clang-format off
1993  ////////////////////////////////////////////////////////////////////////////
1994  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
1995  /// \tparam F The type of the aggregator callable. Automatically deduced.
1996  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
1997  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
1998  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U&,T)`, where T is the type of the column, U is the type of the aggregator variable
1999  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2000  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2001  /// \param[in] aggIdentity The aggregator variable of each thread is initialised to this value (or is default-constructed if the parameter is omitted)
2002  /// \return the result of the aggregation wrapped in a `RResultPtr`.
2003  ///
2004  /// An aggregator callable takes two values, an aggregator variable and a column value. The aggregator variable is
2005  /// initialized to aggIdentity or default-constructed if aggIdentity is omitted.
2006  /// This action calls the aggregator callable for each processed entry, passing in the aggregator variable and
2007  /// the value of the column columnName.
2008  /// If the signature is `U(U,T)` the aggregator variable is then copy-assigned the result of the execution of the callable.
2009  /// Otherwise the signature of aggregator must be `void(U&,T)`.
2010  ///
2011  /// The merger callable is used to merge the partial accumulation results of each processing thread. It is only called in multi-thread executions.
2012  /// If its signature is `U(U,U)` the aggregator variables of each thread are merged two by two.
2013  /// If its signature is `void(std::vector<U>& a)` it is assumed that it merges all aggregators in a[0].
2014  ///
2015  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2016  ///
2017  /// Example usage:
2018  /// ~~~{.cpp}
2019  /// auto aggregator = [](double acc, double x) { return acc * x; };
2020  /// ROOT::EnableImplicitMT();
2021  /// // If multithread is enabled, the aggregator function will be called by more threads
2022  /// // and will produce a vector of partial accumulators.
2023  /// // The merger function performs the final aggregation of these partial results.
2024  /// auto merger = [](std::vector<double> &accumulators) {
2025  /// for (auto i : ROOT::TSeqU(1u, accumulators.size())) {
2026  /// accumulators[0] *= accumulators[i];
2027  /// }
2028  /// };
2029  ///
2030  /// // The accumulator is initialized at this value by every thread.
2031  /// double initValue = 1.;
2032  ///
2033  /// // Multiplies all elements of the column "x"
2034  /// auto result = d.Aggregate(aggregator, merger, columnName, initValue);
2035  /// ~~~
2036  // clang-format on
2037  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2038  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2039  typename ArgTypesNoDecay = typename TTraits::CallableTraits<AccFun>::arg_types_nodecay,
2040  typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2041  typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2042  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName, const U &aggIdentity)
2043  {
2044  RDFInternal::CheckAggregate<R, MergeFun>(ArgTypesNoDecay());
2045  const auto columns = columnName.empty() ? ColumnNames_t() : ColumnNames_t({std::string(columnName)});
2046  constexpr auto nColumns = ArgTypes::list_size;
2047 
2048  const auto validColumnNames = GetValidatedColumnNames(1, columns);
2049 
2050  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ArgTypes());
2051 
2052  auto accObjPtr = std::make_shared<U>(aggIdentity);
2053  using Helper_t = RDFInternal::AggregateHelper<AccFun, MergeFun, R, T, U>;
2054  using Action_t = typename RDFInternal::RAction<Helper_t, Proxied>;
2055  auto action = std::make_unique<Action_t>(
2056  Helper_t(std::move(aggregator), std::move(merger), accObjPtr, fLoopManager->GetNSlots()), validColumnNames,
2057  fProxiedPtr, std::move(newColumns));
2058  fLoopManager->Book(action.get());
2059  return MakeResultPtr(accObjPtr, *fLoopManager, std::move(action));
2060  }
2061 
2062  // clang-format off
2063  ////////////////////////////////////////////////////////////////////////////
2064  /// \brief Execute a user-defined accumulation operation on the processed column values in each processing slot
2065  /// \tparam F The type of the aggregator callable. Automatically deduced.
2066  /// \tparam U The type of the aggregator variable. Must be default-constructible, copy-constructible and copy-assignable. Automatically deduced.
2067  /// \tparam T The type of the column to apply the reduction to. Automatically deduced.
2068  /// \param[in] aggregator A callable with signature `U(U,T)` or `void(U,T)`, where T is the type of the column, U is the type of the aggregator variable
2069  /// \param[in] merger A callable with signature `U(U,U)` or `void(std::vector<U>&)` used to merge the results of the accumulations of each thread
2070  /// \param[in] columnName The column to be aggregated. If omitted, the first default column is used instead.
2071  /// \return the result of the aggregation wrapped in a `RResultPtr`.
2072  ///
2073  /// See previous Aggregate overload for more information.
2074  // clang-format on
2075  template <typename AccFun, typename MergeFun, typename R = typename TTraits::CallableTraits<AccFun>::ret_type,
2076  typename ArgTypes = typename TTraits::CallableTraits<AccFun>::arg_types,
2077  typename U = TTraits::TakeFirstParameter_t<ArgTypes>,
2078  typename T = TTraits::TakeFirstParameter_t<TTraits::RemoveFirstParameter_t<ArgTypes>>>
2079  RResultPtr<U> Aggregate(AccFun aggregator, MergeFun merger, std::string_view columnName = "")
2080  {
2081  static_assert(
2082  std::is_default_constructible<U>::value,
2083  "aggregated object cannot be default-constructed. Please provide an initialisation value (aggIdentity)");
2084  return Aggregate(std::move(aggregator), std::move(merger), columnName, U());
2085  }
2086 
2087  // clang-format off
2088  ////////////////////////////////////////////////////////////////////////////
2089  /// \brief Book execution of a custom action using a user-defined helper object.
2090  /// \tparam ColumnTypes List of types of columns used by this action.
2091  /// \tparam Helper The type of the user-defined helper. See below for the required interface it should expose.
2092  /// \param[in] helper The Action Helper to be scheduled.
2093  /// \param[in] columns The names of the columns on which the helper acts.
2094  /// \return the result of the helper wrapped in a `RResultPtr`.
2095  ///
2096  /// This method books a custom action for execution. The behavior of the action is completely dependent on the
2097  /// Helper object provided by the caller. The minimum required interface for the helper is the following (more
2098  /// methods can be present, e.g. a constructor that takes the number of worker threads is usually useful):
2099  ///
2100  /// * Helper must publicly inherit from ROOT::Detail::RDF::RActionImpl<Helper>
2101  /// * Helper(Helper &&): a move-constructor is required. Copy-constructors are discouraged.
2102  /// * Result_t: alias for the type of the result of this action helper. Must be default-constructible.
2103  /// * void Exec(unsigned int slot, ColumnTypes...columnValues): each working thread shall call this method
2104  /// during the event-loop, possibly concurrently. No two threads will ever call Exec with the same 'slot' value:
2105  /// this parameter is there to facilitate writing thread-safe helpers. The other arguments will be the values of
2106  /// the requested columns for the particular entry being processed.
2107  /// * void InitTask(TTreeReader *, unsigned int slot): each working thread shall call this method during the event
2108  /// loop, before processing a batch of entries (possibly read from the TTreeReader passed as argument, if not null).
2109  /// This method can be used e.g. to prepare the helper to process a batch of entries in a given thread. Can be no-op.
2110  /// * void Initialize(): this method is called once before starting the event-loop. Useful for setup operations. Can be no-op.
2111  /// * void Finalize(): this method is called at the end of the event loop. Commonly used to finalize the contents of the result.
2112  /// * Result_t &PartialUpdate(unsigned int slot): this method is optional, i.e. can be omitted. If present, it should
2113  /// return the value of the partial result of this action for the given 'slot'. Different threads might call this
2114  /// method concurrently, but will always pass different 'slot' numbers.
2115  /// * std::shared_ptr<Result_t> GetResultPtr() const: return a shared_ptr to the result of this action (of type
2116  /// Result_t). The RResultPtr returned by Book will point to this object.
2117  ///
2118  /// See ActionHelpers.hxx for the helpers used by standard RDF actions.
2119  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2120  // clang-format on
2121  template <typename... ColumnTypes, typename Helper>
2122  RResultPtr<typename Helper::Result_t> Book(Helper &&helper, const ColumnNames_t &columns = {})
2123  {
2124  constexpr auto nColumns = sizeof...(ColumnTypes);
2125  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columns.size());
2126 
2127  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2128 
2129  // TODO add more static sanity checks on Helper
2130  using AH = RDFDetail::RActionImpl<Helper>;
2131  static_assert(std::is_base_of<AH, Helper>::value && std::is_convertible<Helper *, AH *>::value,
2132  "Action helper of type T must publicly inherit from ROOT::Detail::RDF::RActionImpl<T>");
2133 
2134  using Action_t = typename RDFInternal::RAction<Helper, Proxied, TTraits::TypeList<ColumnTypes...>>;
2135  auto resPtr = helper.GetResultPtr();
2136 
2137  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2138  RDFInternal::TypeList<ColumnTypes...>());
2139 
2140  auto action = std::make_unique<Action_t>(Helper(std::forward<Helper>(helper)), validColumnNames, fProxiedPtr,
2141  RDFInternal::RBookedCustomColumns(newColumns));
2142  fLoopManager->Book(action.get());
2143  return MakeResultPtr(resPtr, *fLoopManager, std::move(action));
2144  }
2145 
2146  ////////////////////////////////////////////////////////////////////////////
2147  /// \brief Provides a representation of the columns in the dataset
2148  /// \tparam ColumnTypes variadic list of branch/column types.
2149  /// \param[in] columnList Names of the columns to be displayed.
2150  /// \param[in] rows Number of events for each column to be displayed.
2151  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2152  ///
2153  /// This function returns a `RResultPtr<RDisplay>` containing all the entries to be displayed, organized in a tabular
2154  /// form. RDisplay will either print on the standard output a summarized version through `Print()` or will return a
2155  /// complete version through `AsString()`.
2156  ///
2157  /// This action is *lazy*: upon invocation of this method the calculation is booked but not executed. See RResultPtr documentation.
2158  ///
2159  /// Example usage:
2160  /// ~~~{.cpp}
2161  /// // Preparing the RResultPtr<RDisplay> object with all columns and default number of entries
2162  /// auto d1 = rdf.Display("");
2163  /// // Preparing the RResultPtr<RDisplay> object with two columns and 128 entries
2164  /// auto d2 = d.Display({"x", "y"}, 128);
2165  /// // Printing the short representations, the event loop will run
2166  /// d1->Print();
2167  /// d2->Print();
2168  /// ~~~
2169  template <typename... ColumnTypes>
2170  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2171  {
2172  CheckIMTDisabled("Display");
2173 
2174  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2175  return CreateAction<RDFInternal::ActionTags::Display, ColumnTypes...>(columnList, displayer);
2176  }
2177 
2178  ////////////////////////////////////////////////////////////////////////////
2179  /// \brief Provides a representation of the columns in the dataset
2180  /// \param[in] columnList Names of the columns to be displayed.
2181  /// \param[in] rows Number of events for each column to be displayed.
2182  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2183  ///
2184  /// This overload automatically infers the column types.
2185  /// See the previous overloads for further details.
2186  RResultPtr<RDisplay> Display(const ColumnNames_t &columnList, const int &nRows = 5)
2187  {
2188  CheckIMTDisabled("Display");
2189  auto displayer = std::make_shared<RDFInternal::RDisplay>(columnList, GetColumnTypeNamesList(columnList), nRows);
2190  return CreateAction<RDFInternal::ActionTags::Display, RDFDetail::RInferredType>(columnList, displayer,
2191  columnList.size());
2192  }
2193 
2194  ////////////////////////////////////////////////////////////////////////////
2195  /// \brief Provides a representation of the columns in the dataset
2196  /// \param[in] columnNameRegexp A regular expression to select the columns.
2197  /// \param[in] rows Number of events for each column to be displayed.
2198  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2199  ///
2200  /// The existing columns are matched against the regular expression. If the string provided
2201  /// is empty, all columns are selected.
2202  /// See the previous overloads for further details.
2203  RResultPtr<RDisplay> Display(std::string_view columnNameRegexp = "", const int &nRows = 5)
2204  {
2205  auto selectedColumns = RDFInternal::ConvertRegexToColumns(fCustomColumns, fLoopManager->GetTree(), fDataSource,
2206  columnNameRegexp, "Display");
2207  return Display(selectedColumns, nRows);
2208  }
2209 
2210  ////////////////////////////////////////////////////////////////////////////
2211  /// \brief Provides a representation of the columns in the dataset
2212  /// \param[in] columnList Names of the columns to be displayed.
2213  /// \param[in] nRows Number of events for each column to be displayed.
2214  /// \return the `RDisplay` instance wrapped in a `RResultPtr`.
2215  ///
2216  /// See the previous overloads for further details.
2217  RResultPtr<RDisplay> Display(std::initializer_list<std::string> columnList, const int &nRows = 5)
2218  {
2219  ColumnNames_t selectedColumns(columnList);
2220  return Display(selectedColumns, nRows);
2221  }
2222 
2223 private:
2224  void AddDefaultColumns()
2225  {
2226  RDFInternal::RBookedCustomColumns newCols;
2227 
2228  // Entry number column
2229  const auto entryColName = "rdfentry_";
2230  auto entryColGen = [](unsigned int, ULong64_t entry) { return entry; };
2231  using NewColEntry_t =
2232  RDFDetail::RCustomColumn<decltype(entryColGen), RDFDetail::CustomColExtraArgs::SlotAndEntry>;
2233 
2234  auto entryColumn = std::make_shared<NewColEntry_t>(fLoopManager, entryColName, std::move(entryColGen),
2235  ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2236  newCols.AddName(entryColName);
2237  newCols.AddColumn(entryColumn, entryColName);
2238 
2239  fLoopManager->RegisterCustomColumn(entryColumn.get());
2240 
2241  // Declare return type to the interpreter, for future use by jitted actions
2242  auto retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) + " { using " + entryColName +
2243  std::to_string(entryColumn->GetID()) + "_type = ULong64_t; }";
2244  fLoopManager->ToJitDeclare(retTypeDeclaration);
2245 
2246  // Slot number column
2247  const auto slotColName = "rdfslot_";
2248  auto slotColGen = [](unsigned int slot) { return slot; };
2249  using NewColSlot_t = RDFDetail::RCustomColumn<decltype(slotColGen), RDFDetail::CustomColExtraArgs::Slot>;
2250 
2251  auto slotColumn = std::make_shared<NewColSlot_t>(fLoopManager, slotColName, std::move(slotColGen),
2252  ColumnNames_t{}, fLoopManager->GetNSlots(), newCols);
2253 
2254  newCols.AddName(slotColName);
2255  newCols.AddColumn(slotColumn, slotColName);
2256 
2257  fLoopManager->RegisterCustomColumn(slotColumn.get());
2258 
2259  fCustomColumns = std::move(newCols);
2260 
2261  // Declare return type to the interpreter, for future use by jitted actions
2262  retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) + " { using " + slotColName +
2263  std::to_string(slotColumn->GetID()) + "_type = unsigned int; }";
2264  fLoopManager->ToJitDeclare(retTypeDeclaration);
2265 
2266  fLoopManager->AddColumnAlias("tdfentry_", entryColName);
2267  fCustomColumns.AddName("tdfentry_");
2268  fLoopManager->AddColumnAlias("tdfslot_", slotColName);
2269  fCustomColumns.AddName("tdfslot_");
2270  }
2271 
2272  std::vector<std::string> GetColumnTypeNamesList(const ColumnNames_t &columnList)
2273  {
2274  std::vector<std::string> types;
2275 
2276  for (auto column : columnList) {
2277  types.push_back(GetColumnType(column));
2278  }
2279  return types;
2280  }
2281 
2282  void CheckIMTDisabled(std::string_view callerName)
2283  {
2284  if (ROOT::IsImplicitMTEnabled()) {
2285  std::string error(callerName);
2286  error += " was called with ImplicitMT enabled, but multi-thread is not supported.";
2287  throw std::runtime_error(error);
2288  }
2289  }
2290 
2291  // Type was specified by the user, no need to infer it
2292  template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2293  typename std::enable_if<!RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2294  RResultPtr<ActionResultType> CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r)
2295  {
2296  constexpr auto nColumns = sizeof...(BranchTypes);
2297 
2298  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2299 
2300  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(),
2301  RDFInternal::TypeList<BranchTypes...>());
2302 
2303  const auto nSlots = fLoopManager->GetNSlots();
2304 
2305  auto action = RDFInternal::BuildAction<BranchTypes...>(validColumnNames, r, nSlots, fProxiedPtr, ActionTag{},
2306  std::move(newColumns));
2307  fLoopManager->Book(action.get());
2308  return MakeResultPtr(r, *fLoopManager, std::move(action));
2309  }
2310 
2311  // User did not specify type, do type inference
2312  // This version of CreateAction has a `nColumns` optional argument. If present, the number of required columns for
2313  // this action is taken equal to nColumns, otherwise it is assumed to be sizeof...(BranchTypes)
2314  template <typename ActionTag, typename... BranchTypes, typename ActionResultType,
2315  typename std::enable_if<RDFInternal::TNeedJitting<BranchTypes...>::value, int>::type = 0>
2316  RResultPtr<ActionResultType>
2317  CreateAction(const ColumnNames_t &columns, const std::shared_ptr<ActionResultType> &r, const int nColumns = -1)
2318  {
2319  auto realNColumns = (nColumns > -1 ? nColumns : sizeof...(BranchTypes));
2320 
2321  const auto validColumnNames = GetValidatedColumnNames(realNColumns, columns);
2322  const unsigned int nSlots = fLoopManager->GetNSlots();
2323 
2324  auto tree = fLoopManager->GetTree();
2325  auto rOnHeap = RDFInternal::MakeSharedOnHeap(r);
2326 
2327  auto upcastNodeOnHeap = RDFInternal::MakeSharedOnHeap(RDFInternal::UpcastNode(fProxiedPtr));
2328  using BaseNodeType_t = typename std::remove_pointer<decltype(upcastNodeOnHeap)>::type::element_type;
2329  RInterface<BaseNodeType_t> upcastInterface(*upcastNodeOnHeap, *fLoopManager, fCustomColumns, fDataSource);
2330 
2331  auto jittedActionOnHeap =
2332  RDFInternal::MakeSharedOnHeap(std::make_shared<RDFInternal::RJittedAction>(*fLoopManager));
2333 
2334  auto toJit = RDFInternal::JitBuildAction(
2335  validColumnNames, upcastNodeOnHeap, typeid(std::shared_ptr<ActionResultType>), typeid(ActionTag), rOnHeap,
2336  tree, nSlots, fCustomColumns, fDataSource, jittedActionOnHeap, fLoopManager->GetID());
2337  fLoopManager->Book(jittedActionOnHeap->get());
2338  fLoopManager->ToJitExec(toJit);
2339  return MakeResultPtr(r, *fLoopManager, *jittedActionOnHeap);
2340  }
2341 
2342  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type>
2343  typename std::enable_if<std::is_default_constructible<RetType>::value, RInterface<Proxied, DS_t>>::type
2344  DefineImpl(std::string_view name, F &&expression, const ColumnNames_t &columns)
2345  {
2346  RDFInternal::CheckCustomColumn(name, fLoopManager->GetTree(), fCustomColumns.GetNames(),
2347  fLoopManager->GetAliasMap(),
2348  fDataSource ? fDataSource->GetColumnNames() : ColumnNames_t{});
2349 
2350  using ArgTypes_t = typename TTraits::CallableTraits<F>::arg_types;
2351  using ColTypesTmp_t = typename RDFInternal::RemoveFirstParameterIf<
2352  std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::Slot>::value, ArgTypes_t>::type;
2353  using ColTypes_t = typename RDFInternal::RemoveFirstTwoParametersIf<
2354  std::is_same<CustomColumnType, RDFDetail::CustomColExtraArgs::SlotAndEntry>::value, ColTypesTmp_t>::type;
2355 
2356  constexpr auto nColumns = ColTypes_t::list_size;
2357 
2358  const auto validColumnNames = GetValidatedColumnNames(nColumns, columns);
2359 
2360  auto newColumns = CheckAndFillDSColumns(validColumnNames, std::make_index_sequence<nColumns>(), ColTypes_t());
2361 
2362  using NewCol_t = RDFDetail::RCustomColumn<F, CustomColumnType>;
2363  RDFInternal::RBookedCustomColumns newCols(newColumns);
2364  auto newColumn = std::make_shared<NewCol_t>(fLoopManager, name, std::forward<F>(expression), validColumnNames,
2365  fLoopManager->GetNSlots(), newCols);
2366 
2367  // Declare return type to the interpreter, for future use by jitted actions
2368  auto retTypeName = RDFInternal::TypeID2TypeName(typeid(RetType));
2369  if (retTypeName.empty()) {
2370  // The type is not known to the interpreter.
2371  // Forward-declare it as void + helpful comment, so that if this Define'd quantity is
2372  // ever used by jitted code users will have some way to know what went wrong
2373  const auto demangledType = RDFInternal::DemangleTypeIdName(typeid(RetType));
2374  retTypeName = "void /* The type of column \"" + std::string(name) + "\" (" + demangledType +
2375  ") is not known to the interpreter. */";
2376  }
2377  const auto retTypeDeclaration = "namespace __rdf" + std::to_string(fLoopManager->GetID()) +
2378  " { " + +" using " + std::string(name) + std::to_string(newColumn->GetID()) +
2379  "_type = " + retTypeName + "; }";
2380  fLoopManager->ToJitDeclare(retTypeDeclaration);
2381 
2382  fLoopManager->RegisterCustomColumn(newColumn.get());
2383  newCols.AddName(name);
2384  newCols.AddColumn(newColumn, name);
2385 
2386  RInterface<Proxied> newInterface(fProxiedPtr, *fLoopManager, std::move(newCols), fDataSource);
2387 
2388  return newInterface;
2389  }
2390 
2391  // This overload is chosen when the callable passed to Define or DefineSlot returns void.
2392  // It simply fires a compile-time error. This is preferable to a static_assert in the main `Define` overload because
2393  // this way compilation of `Define` has no way to continue after throwing the error.
2394  template <typename F, typename CustomColumnType, typename RetType = typename TTraits::CallableTraits<F>::ret_type,
2395  bool IsFStringConv = std::is_convertible<F, std::string>::value,
2396  bool IsRetTypeDefConstr = std::is_default_constructible<RetType>::value>
2397  typename std::enable_if<!IsFStringConv && !IsRetTypeDefConstr, RInterface<Proxied, DS_t>>::type
2398  DefineImpl(std::string_view, F, const ColumnNames_t &)
2399  {
2400  static_assert(std::is_default_constructible<typename TTraits::CallableTraits<F>::ret_type>::value,
2401  "Error in `Define`: type returned by expression is not default-constructible");
2402  return *this; // never reached
2403  }
2404 
2405  ////////////////////////////////////////////////////////////////////////////
2406  /// \brief Implementation of snapshot
2407  /// \param[in] treename The name of the TTree
2408  /// \param[in] filename The name of the TFile
2409  /// \param[in] columnList The list of names of the branches to be written
2410  /// The implementation exploits Foreach. The association of the addresses to
2411  /// the branches takes place at the first event. This is possible because
2412  /// since there are no copies, the address of the value passed by reference
2413  /// is the address pointing to the storage of the read/created object in/by
2414  /// the TTreeReaderValue/TemporaryBranch
2415  template <typename... ColumnTypes>
2416  RResultPtr<RInterface<RLoopManager>> SnapshotImpl(std::string_view treename, std::string_view filename,
2417  const ColumnNames_t &columnList, const RSnapshotOptions &options)
2418  {
2419  RDFInternal::CheckTypesAndPars(sizeof...(ColumnTypes), columnList.size());
2420 
2421  const auto validCols = GetValidatedColumnNames(columnList.size(), columnList);
2422 
2423  auto newColumns = CheckAndFillDSColumns(validCols, std::index_sequence_for<ColumnTypes...>(),
2424  TTraits::TypeList<ColumnTypes...>());
2425 
2426  const std::string fullTreename(treename);
2427  // split name into directory and treename if needed
2428  const auto lastSlash = treename.rfind('/');
2429  std::string_view dirname = "";
2430  if (std::string_view::npos != lastSlash) {
2431  dirname = treename.substr(0, lastSlash);
2432  treename = treename.substr(lastSlash + 1, treename.size());
2433  }
2434 
2435  // add action node to functional graph and run event loop
2436  std::unique_ptr<RDFInternal::RActionBase> actionPtr;
2437  if (!ROOT::IsImplicitMTEnabled()) {
2438  // single-thread snapshot
2439  using Helper_t = RDFInternal::SnapshotHelper<ColumnTypes...>;
2440  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2441  actionPtr.reset(new Action_t(Helper_t(filename, dirname, treename, validCols, columnList, options), validCols,
2442  fProxiedPtr, std::move(newColumns)));
2443  } else {
2444  // multi-thread snapshot
2445  using Helper_t = RDFInternal::SnapshotHelperMT<ColumnTypes...>;
2446  using Action_t = RDFInternal::RAction<Helper_t, Proxied>;
2447  actionPtr.reset(new Action_t(
2448  Helper_t(fLoopManager->GetNSlots(), filename, dirname, treename, validCols, columnList, options), validCols,
2449  fProxiedPtr, std::move(newColumns)));
2450  }
2451 
2452  fLoopManager->Book(actionPtr.get());
2453 
2454  return RDFInternal::CreateSnapshotRDF(validCols, fullTreename, filename, options.fLazy, *fLoopManager,
2455  std::move(actionPtr));
2456  }
2457 
2458  ////////////////////////////////////////////////////////////////////////////
2459  /// \brief Implementation of cache
2460  template <typename... BranchTypes, std::size_t... S>
2461  RInterface<RLoopManager> CacheImpl(const ColumnNames_t &columnList, std::index_sequence<S...> s)
2462  {
2463  // Check at compile time that the columns types are copy constructible
2464  constexpr bool areCopyConstructible =
2465  RDFInternal::TEvalAnd<std::is_copy_constructible<BranchTypes>::value...>::value;
2466  static_assert(areCopyConstructible, "Columns of a type which is not copy constructible cannot be cached yet.");
2467 
2468  // We share bits and pieces with snapshot. De facto this is a snapshot
2469  // in memory!
2470  RDFInternal::CheckTypesAndPars(sizeof...(BranchTypes), columnList.size());
2471 
2472  auto colHolders = std::make_tuple(Take<BranchTypes>(columnList[S])...);
2473  auto ds = std::make_unique<RLazyDS<BranchTypes...>>(std::make_pair(columnList[S], std::get<S>(colHolders))...);
2474 
2475  RInterface<RLoopManager> cachedRDF(std::make_shared<RLoopManager>(std::move(ds), columnList));
2476 
2477  (void)s; // Prevents unused warning
2478 
2479  return cachedRDF;
2480  }
2481 
2482 protected:
2483  RInterface(const std::shared_ptr<Proxied> &proxied, RLoopManager &lm,
2484  const RDFInternal::RBookedCustomColumns &columns, RDataSource *ds)
2485  : fProxiedPtr(proxied), fLoopManager(&lm), fDataSource(ds), fCustomColumns(columns)
2486  {
2487  }
2488 
2489  RLoopManager *GetLoopManager() const { return fLoopManager; }
2490 
2491  const std::shared_ptr<Proxied> &GetProxiedPtr() const { return fProxiedPtr; }
2492 
2493  /// Prepare the call to the GetValidatedColumnNames routine, making sure that GetBranchNames,
2494  /// which is expensive in terms of runtime, is called at most once.
2495  ColumnNames_t GetValidatedColumnNames(const unsigned int nColumns, const ColumnNames_t &columns)
2496  {
2497  return RDFInternal::GetValidatedColumnNames(*fLoopManager, nColumns, columns, fCustomColumns.GetNames(),
2498  fDataSource);
2499  }
2500 
2501  template <typename... ColumnTypes, std::size_t... S>
2502  RDFInternal::RBookedCustomColumns
2503  CheckAndFillDSColumns(ColumnNames_t validCols, std::index_sequence<S...>, TTraits::TypeList<ColumnTypes...>)
2504  {
2505  return fDataSource
2506  ? RDFInternal::AddDSColumns(*fLoopManager, validCols, fCustomColumns, *fDataSource,
2507  fLoopManager->GetNSlots(), std::index_sequence_for<ColumnTypes...>(),
2508  TTraits::TypeList<ColumnTypes...>())
2509  : fCustomColumns;
2510  }
2511 };
2512 
2513 } // end NS RDF
2514 
2515 } // namespace ROOT
2516 
2517 #endif // ROOT_RDF_INTERFACE