Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RNTuple.hxx
Go to the documentation of this file.
1 /// \file ROOT/RNTuple.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2018-10-04
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RNTuple
17 #define ROOT7_RNTuple
18 
19 #include <ROOT/RNTupleMetrics.hxx>
20 #include <ROOT/RNTupleModel.hxx>
21 #include <ROOT/RNTupleOptions.hxx>
22 #include <ROOT/RNTupleUtil.hxx>
23 #include <ROOT/RNTupleView.hxx>
24 #include <ROOT/RPageStorage.hxx>
25 #include <ROOT/RStringView.hxx>
26 
27 #include <iterator>
28 #include <memory>
29 #include <sstream>
30 #include <utility>
31 
32 namespace ROOT {
33 namespace Experimental {
34 
35 class REntry;
36 class RNTupleModel;
37 
38 namespace Detail {
39 class RPageSink;
40 class RPageSource;
41 }
42 
43 namespace Detail {
44 
45 // clang-format off
46 /**
47 \class ROOT::Experimental::RNTuple
48 \ingroup NTuple
49 \brief The RNTuple represents a live dataset, whose structure is defined by an RNTupleModel
50 
51 RNTuple connects the static information of the RNTupleModel to a source or sink on physical storage.
52 Reading and writing requires use of the corresponding derived class RNTupleReader or RNTupleWriter.
53 RNTuple writes only complete entries (rows of the data set). The entry itself is not kept within the
54 RNTuple, which allows for multiple concurrent entries for the same RNTuple. Besides reading an entire entry,
55 the RNTuple can expose views that read only specific fields.
56 */
57 // clang-format on
58 class RNTuple {
59 protected:
60  std::unique_ptr<RNTupleModel> fModel;
61  /// The number of entries is constant for reading and reflects the sum of Fill() operations when writing
62  NTupleSize_t fNEntries;
63 
64  /// Only the derived RNTupleReader and RNTupleWriter can be instantiated
65  explicit RNTuple(std::unique_ptr<RNTupleModel> model);
66 
67 public:
68  RNTuple(const RNTuple&) = delete;
69  RNTuple& operator =(const RNTuple&) = delete;
70  ~RNTuple();
71 
72  RNTupleModel* GetModel() { return fModel.get(); }
73 }; // RNTuple
74 
75 } // namespace Detail
76 
77 
78 /**
79  * Listing of the different options that can be returned by RNTupleReader::GetInfo()
80  */
81 enum class ENTupleInfo {
82  kSummary, // The ntuple name, description, number of entries
83  kStorageDetails, // size on storage, page sizes, compression factor, etc.
84  kMetrics, // internals performance counters, requires that EnableMetrics() was called
85 };
86 
87 
88 // clang-format off
89 /**
90 \class ROOT::Experimental::RNTupleReader
91 \ingroup NTuple
92 \brief An RNTuple that is used to read data from storage
93 
94 An input ntuple provides data from storage as C++ objects. The ntuple model can be created from the data on storage
95 or it can be imposed by the user. The latter case allows users to read into a specialized ntuple model that covers
96 only a subset of the fields in the ntuple. The ntuple model is used when reading complete entries.
97 Individual fields can be read as well by instantiating a tree view.
98 */
99 // clang-format on
100 class RNTupleReader : public Detail::RNTuple {
101 private:
102  std::unique_ptr<Detail::RPageSource> fSource;
103  Detail::RNTupleMetrics fMetrics;
104 
105  void ConnectModel();
106 
107 public:
108  // Browse through the entries
109  class RIterator : public std::iterator<std::forward_iterator_tag, NTupleSize_t> {
110  private:
111  using iterator = RIterator;
112  NTupleSize_t fIndex = kInvalidNTupleIndex;
113  public:
114  RIterator() = default;
115  explicit RIterator(NTupleSize_t index) : fIndex(index) {}
116  ~RIterator() = default;
117 
118  iterator operator++(int) /* postfix */ { auto r = *this; fIndex++; return r; }
119  iterator& operator++() /* prefix */ { ++fIndex; return *this; }
120  reference operator* () { return fIndex; }
121  pointer operator->() { return &fIndex; }
122  bool operator==(const iterator& rh) const { return fIndex == rh.fIndex; }
123  bool operator!=(const iterator& rh) const { return fIndex != rh.fIndex; }
124  };
125 
126 
127  static std::unique_ptr<RNTupleReader> Open(std::unique_ptr<RNTupleModel> model,
128  std::string_view ntupleName,
129  std::string_view storage);
130  static std::unique_ptr<RNTupleReader> Open(std::string_view ntupleName, std::string_view storage);
131 
132  /// The user imposes an ntuple model, which must be compatible with the model found in the data on storage
133  RNTupleReader(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSource> source);
134  /// The model is generated from the ntuple metadata on storage
135  explicit RNTupleReader(std::unique_ptr<Detail::RPageSource> source);
136  std::unique_ptr<RNTupleReader> Clone() { return std::make_unique<RNTupleReader>(fSource->Clone()); }
137  ~RNTupleReader();
138 
139  NTupleSize_t GetNEntries() const { return fNEntries; }
140  const RNTupleDescriptor &GetDescriptor() const { return fSource->GetDescriptor(); }
141 
142  /// Prints a detailed summary of the ntuple, including a list of fields.
143  void PrintInfo(const ENTupleInfo what = ENTupleInfo::kSummary, std::ostream &output = std::cout);
144 
145  /// Analogous to Fill(), fills the default entry of the model. Returns false at the end of the ntuple.
146  /// On I/O errors, raises an expection.
147  void LoadEntry(NTupleSize_t index) { LoadEntry(index, fModel->GetDefaultEntry()); }
148  /// Fills a user provided entry after checking that the entry has been instantiated from the ntuple model
149  void LoadEntry(NTupleSize_t index, REntry* entry) {
150  for (auto& value : *entry) {
151  value.GetField()->Read(index, &value);
152  }
153  }
154 
155  RNTupleGlobalRange GetViewRange() { return RNTupleGlobalRange(0, fNEntries); }
156 
157  /// Provides access to an individual field that can contain either a scalar value or a collection, e.g.
158  /// GetView<double>("particles.pt") or GetView<std::vector<double>>("particle"). It can as well be the index
159  /// field of a collection itself, like GetView<NTupleSize_t>("particle")
160  template <typename T>
161  RNTupleView<T> GetView(std::string_view fieldName) {
162  auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
163  return RNTupleView<T>(fieldId, fSource.get());
164  }
165  RNTupleViewCollection GetViewCollection(std::string_view fieldName) {
166  auto fieldId = fSource->GetDescriptor().FindFieldId(fieldName);
167  return RNTupleViewCollection(fieldId, fSource.get());
168  }
169 
170  RIterator begin() { return RIterator(0); }
171  RIterator end() { return RIterator(fNEntries); }
172 
173  void EnableMetrics() { fMetrics.Enable(); }
174 };
175 
176 // clang-format off
177 /**
178 \class ROOT::Experimental::RNTupleWriter
179 \ingroup NTuple
180 \brief An RNTuple that gets filled with entries (data) and writes them to storage
181 
182 An output ntuple can be filled with entries. The caller has to make sure that the data that gets filled into an ntuple
183 is not modified for the time of the Fill() call. The fill call serializes the C++ object into the column format and
184 writes data into the corresponding column page buffers. Writing of the buffers to storage is deferred and can be
185 triggered by Flush() or by destructing the ntuple. On I/O errors, an exception is thrown.
186 */
187 // clang-format on
188 class RNTupleWriter : public Detail::RNTuple {
189 private:
190  static constexpr NTupleSize_t kDefaultClusterSizeEntries = 64000;
191  std::unique_ptr<Detail::RPageSink> fSink;
192  NTupleSize_t fClusterSizeEntries;
193  NTupleSize_t fLastCommitted;
194 
195 public:
196  static std::unique_ptr<RNTupleWriter> Recreate(std::unique_ptr<RNTupleModel> model,
197  std::string_view ntupleName,
198  std::string_view storage,
199  const RNTupleWriteOptions &options = RNTupleWriteOptions());
200  RNTupleWriter(std::unique_ptr<RNTupleModel> model, std::unique_ptr<Detail::RPageSink> sink);
201  RNTupleWriter(const RNTupleWriter&) = delete;
202  RNTupleWriter& operator=(const RNTupleWriter&) = delete;
203  ~RNTupleWriter();
204 
205  /// The simplest user interface if the default entry that comes with the ntuple model is used
206  void Fill() { Fill(fModel->GetDefaultEntry()); }
207  /// Multiple entries can have been instantiated from the tnuple model. This method will perform
208  /// a light check whether the entry comes from the ntuple's own model
209  void Fill(REntry *entry) {
210  for (auto& value : *entry) {
211  value.GetField()->Append(value);
212  }
213  fNEntries++;
214  if ((fNEntries % fClusterSizeEntries) == 0)
215  CommitCluster();
216  }
217  /// Ensure that the data from the so far seen Fill calls has been written to storage
218  void CommitCluster();
219 };
220 
221 // clang-format off
222 /**
223 \class ROOT::Experimental::RCollectionNTuple
224 \ingroup NTuple
225 \brief A virtual ntuple for collections that can be used to some extent like a real ntuple
226 *
227 * This class is between a field and a ntuple. It carries the offset column for the collection and the default entry
228 * taken from the collection model. It does not, however, have a tree model because the collection model has been merged
229 * into the larger ntuple model.
230 */
231 // clang-format on
232 class RCollectionNTuple {
233 private:
234  ClusterSize_t fOffset;
235  std::unique_ptr<REntry> fDefaultEntry;
236 public:
237  explicit RCollectionNTuple(std::unique_ptr<REntry> defaultEntry);
238  RCollectionNTuple(const RCollectionNTuple&) = delete;
239  RCollectionNTuple& operator=(const RCollectionNTuple&) = delete;
240  ~RCollectionNTuple() = default;
241 
242  void Fill() { Fill(fDefaultEntry.get()); }
243  void Fill(REntry *entry) {
244  for (auto& treeValue : *entry) {
245  treeValue.GetField()->Append(treeValue);
246  }
247  fOffset++;
248  }
249 
250  ClusterSize_t* GetOffsetPtr() { return &fOffset; }
251 };
252 
253 } // namespace Experimental
254 } // namespace ROOT
255 
256 #endif