Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RPageStorage.hxx
Go to the documentation of this file.
1 /// \file ROOT/RPageStorage.hxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2018-07-19
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #ifndef ROOT7_RPageStorage
17 #define ROOT7_RPageStorage
18 
20 #include <ROOT/RNTupleOptions.hxx>
21 #include <ROOT/RNTupleUtil.hxx>
22 #include <ROOT/RPage.hxx>
23 #include <ROOT/RPageAllocator.hxx>
24 #include <ROOT/RStringView.hxx>
25 
26 #include <atomic>
27 #include <cstddef>
28 #include <memory>
29 
30 namespace ROOT {
31 namespace Experimental {
32 
33 class RNTupleModel;
34 // TODO(jblomer): factory methods to create tree sinks and sources outside Detail namespace
35 
36 namespace Detail {
37 
38 class RColumn;
39 class RPagePool;
40 class RFieldBase;
41 class RNTupleMetrics;
42 
43 enum class EPageStorageType {
44  kSink,
45  kSource,
46 };
47 
48 // clang-format off
49 /**
50 \class ROOT::Experimental::Detail::RPageStorage
51 \ingroup NTuple
52 \brief Common functionality of an ntuple storage for both reading and writing
53 
54 The RPageStore provides access to a storage container that keeps the bits of pages and clusters comprising
55 an ntuple. Concrete implementations can use a TFile, a raw file, an object store, and so on.
56 */
57 // clang-format on
58 class RPageStorage {
59 protected:
60  std::string fNTupleName;
61 
62 public:
63  explicit RPageStorage(std::string_view name);
64  RPageStorage(const RPageStorage &other) = delete;
65  RPageStorage& operator =(const RPageStorage &other) = delete;
66  virtual ~RPageStorage();
67 
68  struct RColumnHandle {
69  RColumnHandle() : fId(-1), fColumn(nullptr) {}
70  RColumnHandle(int id, const RColumn *column) : fId(id), fColumn(column) {}
71  int fId;
72  const RColumn *fColumn;
73  };
74  /// The column handle identifies a column with the current open page storage
75  using ColumnHandle_t = RColumnHandle;
76 
77  /// Register a new column. When reading, the column must exist in the ntuple on disk corresponding to the meta-data.
78  /// When writing, every column can only be attached once.
79  virtual ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) = 0;
80  /// Whether the concrete implementation is a sink or a source
81  virtual EPageStorageType GetType() = 0;
82 
83  /// Every page store needs to be able to free pages it handed out. But Sinks and sources have different means
84  /// of allocating pages.
85  virtual void ReleasePage(RPage &page) = 0;
86 
87  /// Page storage implementations usually have their own metrics
88  virtual RNTupleMetrics &GetMetrics() = 0;
89 };
90 
91 // clang-format off
92 /**
93 \class ROOT::Experimental::Detail::RPageSink
94 \ingroup NTuple
95 \brief Abstract interface to write data into an ntuple
96 
97 The page sink takes the list of columns and afterwards a series of page commits and cluster commits.
98 The user is responsible to commit clusters at a consistent point, i.e. when all pages corresponding to data
99 up to the given entry number are committed.
100 */
101 // clang-format on
102 class RPageSink : public RPageStorage {
103 protected:
104  const RNTupleWriteOptions fOptions;
105 
106  /// Building the ntuple descriptor while writing is done in the same way for all the storage sink implementations.
107  /// Field, column, cluster ids and page indexes per cluster are issued sequentially starting with 0
108  DescriptorId_t fLastFieldId = 0;
109  DescriptorId_t fLastColumnId = 0;
110  DescriptorId_t fLastClusterId = 0;
111  NTupleSize_t fPrevClusterNEntries = 0;
112  /// Keeps track of the number of elements in the currently open cluster. Indexed by column id.
113  std::vector<RClusterDescriptor::RColumnRange> fOpenColumnRanges;
114  /// Keeps track of the written pages in the currently open cluster. Indexed by column id.
115  std::vector<RClusterDescriptor::RPageRange> fOpenPageRanges;
116  RNTupleDescriptorBuilder fDescriptorBuilder;
117 
118  virtual void DoCreate(const RNTupleModel &model) = 0;
119  virtual RClusterDescriptor::RLocator DoCommitPage(ColumnHandle_t columnHandle, const RPage &page) = 0;
120  virtual RClusterDescriptor::RLocator DoCommitCluster(NTupleSize_t nEntries) = 0;
121  virtual void DoCommitDataset() = 0;
122 
123 public:
124  RPageSink(std::string_view ntupleName, const RNTupleWriteOptions &options);
125  virtual ~RPageSink();
126  /// Guess the concrete derived page source from the file name (location)
127  static std::unique_ptr<RPageSink> Create(std::string_view ntupleName, std::string_view location,
128  const RNTupleWriteOptions &options = RNTupleWriteOptions());
129  EPageStorageType GetType() final { return EPageStorageType::kSink; }
130 
131  ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
132 
133  /// Physically creates the storage container to hold the ntuple (e.g., a keys a TFile or an S3 bucket)
134  /// To do so, Create() calls DoCreate() after updating the descriptor.
135  /// Create() associates column handles to the columns referenced by the model
136  void Create(RNTupleModel &model);
137  /// Write a page to the storage. The column must have been added before.
138  void CommitPage(ColumnHandle_t columnHandle, const RPage &page);
139  /// Finalize the current cluster and create a new one for the following data.
140  void CommitCluster(NTupleSize_t nEntries);
141  /// Finalize the current cluster and the entrire data set.
142  void CommitDataset() { DoCommitDataset(); }
143 
144  /// Get a new, empty page for the given column that can be filled with up to nElements. If nElements is zero,
145  /// the page sink picks an appropriate size.
146  virtual RPage ReservePage(ColumnHandle_t columnHandle, std::size_t nElements = 0) = 0;
147 };
148 
149 // clang-format off
150 /**
151 \class ROOT::Experimental::Detail::RPageSource
152 \ingroup NTuple
153 \brief Abstract interface to read data from an ntuple
154 
155 The page source is initialized with the columns of interest. Pages from those columns can then be
156 mapped into memory. The page source also gives access to the ntuple's meta-data.
157 */
158 // clang-format on
159 class RPageSource : public RPageStorage {
160 protected:
161  const RNTupleReadOptions fOptions;
162  RNTupleDescriptor fDescriptor;
163 
164  virtual RNTupleDescriptor DoAttach() = 0;
165 
166 public:
167  RPageSource(std::string_view ntupleName, const RNTupleReadOptions &fOptions);
168  virtual ~RPageSource();
169  /// Guess the concrete derived page source from the file name (location)
170  static std::unique_ptr<RPageSource> Create(std::string_view ntupleName, std::string_view location,
171  const RNTupleReadOptions &options = RNTupleReadOptions());
172  /// Open the same storage multiple time, e.g. for reading in multiple threads
173  virtual std::unique_ptr<RPageSource> Clone() const = 0;
174 
175  EPageStorageType GetType() final { return EPageStorageType::kSource; }
176  const RNTupleDescriptor &GetDescriptor() const { return fDescriptor; }
177  ColumnHandle_t AddColumn(DescriptorId_t fieldId, const RColumn &column) final;
178 
179  /// Open the physical storage container for the tree
180  void Attach() { fDescriptor = DoAttach(); }
181  NTupleSize_t GetNEntries();
182  NTupleSize_t GetNElements(ColumnHandle_t columnHandle);
183  ColumnId_t GetColumnId(ColumnHandle_t columnHandle);
184 
185  /// Allocates and fills a page that contains the index-th element
186  virtual RPage PopulatePage(ColumnHandle_t columnHandle, NTupleSize_t globalIndex) = 0;
187  /// Another version of PopulatePage that allows to specify cluster-relative indexes
188  virtual RPage PopulatePage(ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex) = 0;
189 };
190 
191 } // namespace Detail
192 
193 } // namespace Experimental
194 } // namespace ROOT
195 
196 #endif