Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RPageStorageRoot.cxx
Go to the documentation of this file.
1 /// \file RPageStorage.cxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2018-10-04
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #include <ROOT/RField.hxx>
18 #include <ROOT/RNTupleModel.hxx>
19 #include <ROOT/RPage.hxx>
20 #include <ROOT/RPageAllocator.hxx>
21 #include <ROOT/RPagePool.hxx>
23 #include <ROOT/RLogger.hxx>
24 
25 #include <TKey.h>
26 
27 #include <cstdlib>
28 #include <iostream>
29 #include <utility>
30 
31 namespace {
32 
33 static constexpr const char* kKeySeparator = "_";
34 static constexpr const char* kKeyNTupleFooter = "NTPLF";
35 static constexpr const char* kKeyNTupleHeader = "NTPLH";
36 static constexpr const char* kKeyPagePayload = "NTPLP";
37 
38 }
39 
40 ROOT::Experimental::Detail::RPageSinkRoot::RPageSinkRoot(std::string_view ntupleName, std::string_view path,
41  const RNTupleWriteOptions &options)
42  : RPageSink(ntupleName, options)
43  , fMetrics("RPageSinkRoot")
44  , fPageAllocator(std::make_unique<RPageAllocatorHeap>())
45 {
46  R__WARNING_HERE("NTuple") << "The RNTuple file format will change. " <<
47  "Do not store real data with this version of RNTuple!";
48  fFile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "RECREATE"));
49  fFile->SetCompressionSettings(fOptions.GetCompression());
50 }
51 
52 ROOT::Experimental::Detail::RPageSinkRoot::~RPageSinkRoot()
53 {
54  if (fFile)
55  fFile->Close();
56 }
57 
58 void ROOT::Experimental::Detail::RPageSinkRoot::DoCreate(const RNTupleModel & /* model */)
59 {
60  fDirectory = fFile->mkdir(fNTupleName.c_str());
61 
62  const auto &descriptor = fDescriptorBuilder.GetDescriptor();
63  auto szHeader = descriptor.SerializeHeader(nullptr);
64  auto buffer = new unsigned char[szHeader];
65  descriptor.SerializeHeader(buffer);
66  ROOT::Experimental::Internal::RNTupleBlob blob(szHeader, buffer);
67  fDirectory->WriteObject(&blob, kKeyNTupleHeader);
68  delete[] buffer;
69 }
70 
71 ROOT::Experimental::RClusterDescriptor::RLocator
72 ROOT::Experimental::Detail::RPageSinkRoot::DoCommitPage(ColumnHandle_t columnHandle, const RPage &page)
73 {
74  unsigned char *buffer = reinterpret_cast<unsigned char *>(page.GetBuffer());
75  auto packedBytes = page.GetSize();
76  auto element = columnHandle.fColumn->GetElement();
77  const auto isMappable = element->IsMappable();
78 
79  if (!isMappable) {
80  packedBytes = (page.GetNElements() * element->GetBitsOnStorage() + 7) / 8;
81  buffer = new unsigned char[packedBytes];
82  element->Pack(buffer, page.GetBuffer(), page.GetNElements());
83  }
84 
85  ROOT::Experimental::Internal::RNTupleBlob pagePayload(packedBytes, buffer);
86  std::string keyName = std::string(kKeyPagePayload) +
87  std::to_string(fLastClusterId) + kKeySeparator +
88  std::to_string(fLastPageIdx);
89  fDirectory->WriteObject(&pagePayload, keyName.c_str());
90 
91  if (!isMappable) {
92  delete[] buffer;
93  }
94 
95  RClusterDescriptor::RLocator result;
96  result.fPosition = fLastPageIdx++;
97  result.fBytesOnStorage = packedBytes;
98  return result;
99 }
100 
101 ROOT::Experimental::RClusterDescriptor::RLocator
102 ROOT::Experimental::Detail::RPageSinkRoot::DoCommitCluster(ROOT::Experimental::NTupleSize_t /* nEntries */)
103 {
104  fLastPageIdx = 0;
105  return RClusterDescriptor::RLocator();
106 }
107 
108 void ROOT::Experimental::Detail::RPageSinkRoot::DoCommitDataset()
109 {
110  if (!fDirectory)
111  return;
112 
113  const auto &descriptor = fDescriptorBuilder.GetDescriptor();
114  auto szFooter = descriptor.SerializeFooter(nullptr);
115  auto buffer = new unsigned char[szFooter];
116  descriptor.SerializeFooter(buffer);
117  ROOT::Experimental::Internal::RNTupleBlob footerBlob(szFooter, buffer);
118  fDirectory->WriteObject(&footerBlob, kKeyNTupleFooter);
119  delete[] buffer;
120 }
121 
122 ROOT::Experimental::Detail::RPage
123 ROOT::Experimental::Detail::RPageSinkRoot::ReservePage(ColumnHandle_t columnHandle, std::size_t nElements)
124 {
125  if (nElements == 0)
126  nElements = kDefaultElementsPerPage;
127  auto elementSize = columnHandle.fColumn->GetElement()->GetSize();
128  return fPageAllocator->NewPage(columnHandle.fId, elementSize, nElements);
129 }
130 
131 void ROOT::Experimental::Detail::RPageSinkRoot::ReleasePage(RPage &page)
132 {
133  fPageAllocator->DeletePage(page);
134 }
135 
136 
137 ////////////////////////////////////////////////////////////////////////////////
138 
139 
140 ROOT::Experimental::Detail::RPage ROOT::Experimental::Detail::RPageAllocatorKey::NewPage(
141  ColumnId_t columnId, void *mem, std::size_t elementSize, std::size_t nElements)
142 {
143  RPage newPage(columnId, mem, elementSize * nElements, elementSize);
144  newPage.TryGrow(nElements);
145  return newPage;
146 }
147 
148 void ROOT::Experimental::Detail::RPageAllocatorKey::DeletePage(
149  const RPage& page, ROOT::Experimental::Internal::RNTupleBlob *payload)
150 {
151  if (page.IsNull())
152  return;
153  R__ASSERT(page.GetBuffer() == payload->fContent);
154  free(payload->fContent);
155  delete payload;
156 }
157 
158 
159 ////////////////////////////////////////////////////////////////////////////////
160 
161 
162 ROOT::Experimental::Detail::RPageSourceRoot::RPageSourceRoot(std::string_view ntupleName, std::string_view path,
163  const RNTupleReadOptions &options)
164  : RPageSource(ntupleName, options)
165  , fMetrics("RPageSourceRoot")
166  , fPageAllocator(std::make_unique<RPageAllocatorKey>())
167  , fPagePool(std::make_shared<RPagePool>())
168 {
169  fFile = std::unique_ptr<TFile>(TFile::Open(std::string(path).c_str(), "READ"));
170 }
171 
172 
173 ROOT::Experimental::Detail::RPageSourceRoot::~RPageSourceRoot()
174 {
175  if (fFile)
176  fFile->Close();
177 }
178 
179 
180 ROOT::Experimental::RNTupleDescriptor ROOT::Experimental::Detail::RPageSourceRoot::DoAttach()
181 {
182  fDirectory = fFile->GetDirectory(fNTupleName.c_str());
183  RNTupleDescriptorBuilder descBuilder;
184 
185  auto keyRawNTupleHeader = fDirectory->GetKey(kKeyNTupleHeader);
186  auto ntupleRawHeader = keyRawNTupleHeader->ReadObject<ROOT::Experimental::Internal::RNTupleBlob>();
187  descBuilder.SetFromHeader(ntupleRawHeader->fContent);
188  free(ntupleRawHeader->fContent);
189  delete ntupleRawHeader;
190 
191  auto keyRawNTupleFooter = fDirectory->GetKey(kKeyNTupleFooter);
192  auto ntupleRawFooter = keyRawNTupleFooter->ReadObject<ROOT::Experimental::Internal::RNTupleBlob>();
193  descBuilder.AddClustersFromFooter(ntupleRawFooter->fContent);
194  free(ntupleRawFooter->fContent);
195  delete ntupleRawFooter;
196 
197  return descBuilder.MoveDescriptor();
198 }
199 
200 
201 ROOT::Experimental::Detail::RPage ROOT::Experimental::Detail::RPageSourceRoot::PopulatePageFromCluster(
202  ColumnHandle_t columnHandle, const RClusterDescriptor &clusterDescriptor, ClusterSize_t::ValueType clusterIndex)
203 {
204  auto columnId = columnHandle.fId;
205  auto clusterId = clusterDescriptor.GetId();
206  const auto &pageRange = clusterDescriptor.GetPageRange(columnId);
207 
208  // TODO(jblomer): binary search
209  RClusterDescriptor::RPageRange::RPageInfo pageInfo;
210  decltype(clusterIndex) firstInPage = 0;
211  for (const auto &pi : pageRange.fPageInfos) {
212  if (firstInPage + pi.fNElements > clusterIndex) {
213  pageInfo = pi;
214  break;
215  }
216  firstInPage += pi.fNElements;
217  }
218  R__ASSERT(firstInPage <= clusterIndex);
219  R__ASSERT((firstInPage + pageInfo.fNElements) > clusterIndex);
220 
221  //printf("Populating page %lu/%lu [%lu] for column %d starting at %lu\n", clusterId, pageInCluster, pageIdx, columnId, firstInPage);
222 
223  std::string keyName = std::string(kKeyPagePayload) +
224  std::to_string(clusterId) + kKeySeparator +
225  std::to_string(pageInfo.fLocator.fPosition);
226  auto pageKey = fDirectory->GetKey(keyName.c_str());
227  auto pagePayload = pageKey->ReadObject<ROOT::Experimental::Internal::RNTupleBlob>();
228 
229  unsigned char *buffer = pagePayload->fContent;
230  auto element = columnHandle.fColumn->GetElement();
231  auto elementSize = element->GetSize();
232  if (!element->IsMappable()) {
233  auto pageSize = elementSize * pageInfo.fNElements;
234  buffer = reinterpret_cast<unsigned char *>(malloc(pageSize));
235  R__ASSERT(buffer != nullptr);
236  element->Unpack(buffer, pagePayload->fContent, pageInfo.fNElements);
237  free(pagePayload->fContent);
238  pagePayload->fContent = buffer;
239  pagePayload->fSize = pageSize;
240  }
241 
242  auto indexOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
243  auto newPage = fPageAllocator->NewPage(columnId, pagePayload->fContent, elementSize, pageInfo.fNElements);
244  newPage.SetWindow(indexOffset + firstInPage, RPage::RClusterInfo(clusterId, indexOffset));
245  fPagePool->RegisterPage(newPage,
246  RPageDeleter([](const RPage &page, void *userData)
247  {
248  RPageAllocatorKey::DeletePage(page, reinterpret_cast<ROOT::Experimental::Internal::RNTupleBlob *>(userData));
249  }, pagePayload));
250  return newPage;
251 }
252 
253 
254 ROOT::Experimental::Detail::RPage ROOT::Experimental::Detail::RPageSourceRoot::PopulatePage(
255  ColumnHandle_t columnHandle, NTupleSize_t globalIndex)
256 {
257  auto columnId = columnHandle.fId;
258  auto cachedPage = fPagePool->GetPage(columnId, globalIndex);
259  if (!cachedPage.IsNull())
260  return cachedPage;
261 
262  auto clusterId = fDescriptor.FindClusterId(columnId, globalIndex);
263  R__ASSERT(clusterId != kInvalidDescriptorId);
264  const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
265  auto selfOffset = clusterDescriptor.GetColumnRange(columnId).fFirstElementIndex;
266  R__ASSERT(selfOffset <= globalIndex);
267  return PopulatePageFromCluster(columnHandle, clusterDescriptor, globalIndex - selfOffset);
268 }
269 
270 
271 ROOT::Experimental::Detail::RPage ROOT::Experimental::Detail::RPageSourceRoot::PopulatePage(
272  ColumnHandle_t columnHandle, const RClusterIndex &clusterIndex)
273 {
274  auto clusterId = clusterIndex.GetClusterId();
275  auto index = clusterIndex.GetIndex();
276  auto columnId = columnHandle.fId;
277  auto cachedPage = fPagePool->GetPage(columnId, clusterIndex);
278  if (!cachedPage.IsNull())
279  return cachedPage;
280 
281  R__ASSERT(clusterId != kInvalidDescriptorId);
282  const auto &clusterDescriptor = fDescriptor.GetClusterDescriptor(clusterId);
283  return PopulatePageFromCluster(columnHandle, clusterDescriptor, index);
284 }
285 
286 void ROOT::Experimental::Detail::RPageSourceRoot::ReleasePage(RPage &page)
287 {
288  fPagePool->ReturnPage(page);
289 }
290 
291 std::unique_ptr<ROOT::Experimental::Detail::RPageSource> ROOT::Experimental::Detail::RPageSourceRoot::Clone() const
292 {
293  return std::make_unique<RPageSourceRoot>(fNTupleName, fFile->GetName(), fOptions);
294 }