Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RResultPtr.hxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RRESULTPTR
12 #define ROOT_RRESULTPTR
13 
14 #include "ROOT/RDF/RActionBase.hxx"
16 #include "ROOT/TypeTraits.hxx"
17 #include "TError.h" // Warning
18 
19 #include <memory>
20 #include <functional>
21 
22 namespace ROOT {
23 namespace Internal {
24 namespace RDF {
25 class GraphCreatorHelper;
26 }
27 }
28 }
29 
30 namespace ROOT {
31 namespace RDF {
32 // Fwd decl for MakeResultPtr
33 template <typename T>
34 class RResultPtr;
35 
36 } // ns RDF
37 
38 namespace Detail {
39 namespace RDF {
40 using ROOT::RDF::RResultPtr;
41 // Fwd decl for RResultPtr
42 template <typename T>
43 RResultPtr<T> MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &df,
44  std::shared_ptr<ROOT::Internal::RDF::RActionBase> actionPtr);
45 } // ns RDF
46 } // ns Detail
47 namespace RDF {
48 namespace RDFInternal = ROOT::Internal::RDF;
49 namespace RDFDetail = ROOT::Detail::RDF;
50 namespace TTraits = ROOT::TypeTraits;
51 
52 /// Smart pointer for the return type of actions
53 /**
54 \class ROOT::RDF::RResultPtr
55 \ingroup dataframe
56 \brief A wrapper around the result of RDataFrame actions able to trigger calculations lazily.
57 \tparam T Type of the action result
58 
59 A smart pointer which allows to access the result of a RDataFrame action. The
60 methods of the encapsulated object can be accessed via the arrow operator.
61 Upon invocation of the arrow operator or dereferencing (`operator*`), the
62 loop on the events and calculations of all scheduled actions are executed
63 if needed.
64 It is possible to iterate on the result proxy if the proxied object is a collection.
65 ~~~{.cpp}
66 for (auto& myItem : myResultProxy) { ... };
67 ~~~
68 If iteration is not supported by the type of the proxied object, a compilation error is thrown.
69 
70 */
71 template <typename T>
72 class RResultPtr {
73  // private using declarations
74  using SPT_t = std::shared_ptr<T>;
75 
76  // friend declarations
77  template <typename T1>
78  friend class RResultPtr;
79 
80  template <typename T1>
81  friend RResultPtr<T1> RDFDetail::MakeResultPtr(const std::shared_ptr<T1> &, ::ROOT::Detail::RDF::RLoopManager &,
82  std::shared_ptr<RDFInternal::RActionBase>);
83  template <class T1, class T2>
84  friend bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
85  template <class T1, class T2>
86  friend bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs);
87  template <class T1>
88  friend bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
89  template <class T1>
90  friend bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
91  template <class T1>
92  friend bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs);
93  template <class T1>
94  friend bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs);
95 
96  friend class ROOT::Internal::RDF::GraphDrawing::GraphCreatorHelper;
97 
98  /// \cond HIDDEN_SYMBOLS
99  template <typename V, bool hasBeginEnd = TTraits::HasBeginAndEnd<V>::value>
100  struct RIterationHelper {
101  using Iterator_t = void;
102  void GetBegin(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask begin for this class."); }
103  void GetEnd(const V &) { static_assert(sizeof(V) == 0, "It does not make sense to ask end for this class."); }
104  };
105 
106  template <typename V>
107  struct RIterationHelper<V, true> {
108  using Iterator_t = decltype(std::begin(std::declval<V>()));
109  static Iterator_t GetBegin(const V &v) { return std::begin(v); };
110  static Iterator_t GetEnd(const V &v) { return std::end(v); };
111  };
112  /// \endcond
113 
114  /// Non-owning pointer to the RLoopManager at the root of this computation graph.
115  /// The RLoopManager is guaranteed to be always in scope if fLoopManager is not a nullptr.
116  RDFDetail::RLoopManager *fLoopManager = nullptr;
117  SPT_t fObjPtr; ///< Shared pointer encapsulating the wrapped result
118  /// Owning pointer to the action that will produce this result.
119  /// Ownership is shared with other copies of this ResultPtr.
120  std::shared_ptr<RDFInternal::RActionBase> fActionPtr;
121 
122  /// Triggers the event loop in the RLoopManager
123  void TriggerRun();
124 
125  /// Get the pointer to the encapsulated result.
126  /// Ownership is not transferred to the caller.
127  /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
128  T *Get()
129  {
130  if (!fActionPtr->HasRun())
131  TriggerRun();
132  return fObjPtr.get();
133  }
134 
135  RResultPtr(std::shared_ptr<T> objPtr, RDFDetail::RLoopManager *lm,
136  std::shared_ptr<RDFInternal::RActionBase> actionPtr)
137  : fLoopManager(lm), fObjPtr(std::move(objPtr)), fActionPtr(std::move(actionPtr))
138  {
139  }
140 
141 public:
142  using Value_t = T; ///< Convenience alias to simplify access to proxied type
143  static constexpr ULong64_t kOnce = 0ull; ///< Convenience definition to express a callback must be executed once
144 
145  RResultPtr() = default;
146  RResultPtr(const RResultPtr &) = default;
147  RResultPtr(RResultPtr &&) = default;
148  RResultPtr &operator=(const RResultPtr &) = default;
149  RResultPtr &operator=(RResultPtr &&) = default;
150  explicit operator bool() const { return bool(fObjPtr); }
151  template<typename TO, typename std::enable_if<std::is_convertible<T, TO>::value, int>::type = 0 >
152  operator RResultPtr<TO>() const
153  {
154  RResultPtr<TO> rp;
155  rp.fLoopManager = fLoopManager;
156  rp.fObjPtr = fObjPtr;
157  rp.fActionPtr = fActionPtr;
158  return rp;
159  }
160 
161  /// Get a const reference to the encapsulated object.
162  /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
163  const T &GetValue() { return *Get(); }
164 
165  /// Get the pointer to the encapsulated object.
166  /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
167  T *GetPtr() { return Get(); }
168 
169  /// Get a pointer to the encapsulated object.
170  /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
171  T &operator*() { return *Get(); }
172 
173  /// Get a pointer to the encapsulated object.
174  /// Ownership is not transferred to the caller.
175  /// Triggers event loop and execution of all actions booked in the associated RLoopManager.
176  T *operator->() { return Get(); }
177 
178  /// Return an iterator to the beginning of the contained object if this makes
179  /// sense, throw a compilation error otherwise
180  typename RIterationHelper<T>::Iterator_t begin()
181  {
182  if (!fActionPtr->HasRun())
183  TriggerRun();
184  return RIterationHelper<T>::GetBegin(*fObjPtr);
185  }
186 
187  /// Return an iterator to the end of the contained object if this makes
188  /// sense, throw a compilation error otherwise
189  typename RIterationHelper<T>::Iterator_t end()
190  {
191  if (!fActionPtr->HasRun())
192  TriggerRun();
193  return RIterationHelper<T>::GetEnd(*fObjPtr);
194  }
195 
196  // clang-format off
197  /// Register a callback that RDataFrame will execute "everyNEvents" on a partial result.
198  ///
199  /// \param[in] everyNEvents Frequency at which the callback will be called, as a number of events processed
200  /// \param[in] callback a callable with signature `void(Value_t&)` where Value_t is the type of the value contained in this RResultPtr
201  /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
202  ///
203  /// The callback must be a callable (lambda, function, functor class...) that takes a reference to the result type as
204  /// argument and returns nothing. RDataFrame will invoke registered callbacks passing partial action results as
205  /// arguments to them (e.g. a histogram filled with a part of the selected events, a counter incremented only up to a
206  /// certain point, a mean over a subset of the events and so forth).
207  ///
208  /// Callbacks can be used e.g. to inspect partial results of the analysis while the event loop is running. For
209  /// example one can draw an up-to-date version of a result histogram every 100 entries like this:
210  /// \code{.cpp}
211  /// auto h = tdf.Histo1D("x");
212  /// TCanvas c("c","x hist");
213  /// h.OnPartialResult(100, [&c](TH1D &h_) { c.cd(); h_.Draw(); c.Update(); });
214  /// h->Draw(); // event loop runs here, this `Draw` is executed after the event loop is finished
215  /// \endcode
216  ///
217  /// A value of 0 for everyNEvents indicates the callback must be executed only once, before running the event loop.
218  /// A conveniece definition `kOnce` is provided to make this fact more expressive in user code (see snippet below).
219  /// Multiple callbacks can be registered with the same RResultPtr (i.e. results of RDataFrame actions) and will
220  /// be executed sequentially. Callbacks are executed in the order they were registered.
221  /// The type of the value contained in a RResultPtr is also available as RResultPtr<T>::Value_t, e.g.
222  /// \code{.cpp}
223  /// auto h = tdf.Histo1D("x");
224  /// // h.kOnce is 0
225  /// // decltype(h)::Value_t is TH1D
226  /// \endcode
227  ///
228  /// When implicit multi-threading is enabled, the callback:
229  /// - will never be executed by multiple threads concurrently: it needs not be thread-safe. For example the snippet
230  /// above that draws the partial histogram on a canvas works seamlessly in multi-thread event loops.
231  /// - will always be executed "everyNEvents": partial results will "contain" that number of events more from
232  /// one call to the next
233  /// - might be executed by a different worker thread at different times: the value of `std::this_thread::get_id()`
234  /// might change between calls
235  ///
236  /// To register a callback that is called by _each_ worker thread (concurrently) every N events one can use
237  /// OnPartialResultSlot().
238  // clang-format on
239  RResultPtr<T> &OnPartialResult(ULong64_t everyNEvents, std::function<void(T &)> callback)
240  {
241  const auto nSlots = fLoopManager->GetNSlots();
242  auto actionPtr = fActionPtr;
243  auto c = [nSlots, actionPtr, callback](unsigned int slot) {
244  if (slot != nSlots - 1)
245  return;
246  auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
247  callback(*partialResult);
248  };
249  fLoopManager->RegisterCallback(everyNEvents, std::move(c));
250  return *this;
251  }
252 
253  // clang-format off
254  /// Register a callback that RDataFrame will execute in each worker thread concurrently on that thread's partial result.
255  ///
256  /// \param[in] everyNEvents Frequency at which the callback will be called by each thread, as a number of events processed
257  /// \param[in] a callable with signature `void(unsigned int, Value_t&)` where Value_t is the type of the value contained in this RResultPtr
258  /// \return this RResultPtr, to allow chaining of OnPartialResultSlot with other calls
259  ///
260  /// See `OnPartialResult` for a generic explanation of the callback mechanism.
261  /// Compared to `OnPartialResult`, this method has two major differences:
262  /// - all worker threads invoke the callback once every specified number of events. The event count is per-thread,
263  /// and callback invocation might happen concurrently (i.e. the callback must be thread-safe)
264  /// - the callable must take an extra `unsigned int` parameter corresponding to a multi-thread "processing slot":
265  /// this is a "helper value" to simplify writing thread-safe callbacks: different worker threads might invoke the
266  /// callback concurrently but always with different `slot` numbers.
267  /// - a value of 0 for everyNEvents indicates the callback must be executed once _per slot_.
268  ///
269  /// For example, the following snippet prints out a thread-safe progress bar of the events processed by RDataFrame
270  /// \code
271  /// auto c = tdf.Count(); // any action would do, but `Count` is the most lightweight
272  /// std::string progress;
273  /// std::mutex bar_mutex;
274  /// c.OnPartialResultSlot(nEvents / 100, [&progress, &bar_mutex](unsigned int, ULong64_t &) {
275  /// std::lock_guard<std::mutex> lg(bar_mutex);
276  /// progress.push_back('#');
277  /// std::cout << "\r[" << std::left << std::setw(100) << progress << ']' << std::flush;
278  /// });
279  /// std::cout << "Analysis running..." << std::endl;
280  /// *c; // trigger the event loop by accessing an action's result
281  /// std::cout << "\nDone!" << std::endl;
282  /// \endcode
283  // clang-format on
284  RResultPtr<T> &OnPartialResultSlot(ULong64_t everyNEvents, std::function<void(unsigned int, T &)> callback)
285  {
286  auto actionPtr = fActionPtr;
287  auto c = [actionPtr, callback](unsigned int slot) {
288  auto partialResult = static_cast<Value_t *>(actionPtr->PartialUpdate(slot));
289  callback(slot, *partialResult);
290  };
291  fLoopManager->RegisterCallback(everyNEvents, std::move(c));
292  return *this;
293  }
294 };
295 
296 template <typename T>
297 void RResultPtr<T>::TriggerRun()
298 {
299  fLoopManager->Run();
300 }
301 
302 template <class T1, class T2>
303 bool operator==(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs)
304 {
305  return lhs.fObjPtr == rhs.fObjPtr;
306 }
307 
308 template <class T1, class T2>
309 bool operator!=(const RResultPtr<T1> &lhs, const RResultPtr<T2> &rhs)
310 {
311  return lhs.fObjPtr != rhs.fObjPtr;
312 }
313 
314 template <class T1>
315 bool operator==(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
316 {
317  return lhs.fObjPtr == rhs;
318 }
319 
320 template <class T1>
321 bool operator==(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
322 {
323  return lhs == rhs.fObjPtr;
324 }
325 
326 template <class T1>
327 bool operator!=(const RResultPtr<T1> &lhs, std::nullptr_t rhs)
328 {
329  return lhs.fObjPtr != rhs;
330 }
331 
332 template <class T1>
333 bool operator!=(std::nullptr_t lhs, const RResultPtr<T1> &rhs)
334 {
335  return lhs != rhs.fObjPtr;
336 }
337 
338 } // end NS RDF
339 
340 namespace Detail {
341 namespace RDF {
342 /// Create a RResultPtr and set its pointer to the corresponding RAction
343 /// This overload is invoked by non-jitted actions, as they have access to RAction before constructing RResultPtr.
344 template <typename T>
345 RResultPtr<T>
346 MakeResultPtr(const std::shared_ptr<T> &r, RLoopManager &lm, std::shared_ptr<RDFInternal::RActionBase> actionPtr)
347 {
348  return RResultPtr<T>(r, &lm, std::move(actionPtr));
349 }
350 } // end NS RDF
351 } // end NS Detail
352 } // end NS ROOT
353 
354 #endif // ROOT_TRESULTPROXY