Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RNTupleDescriptorFmt.cxx
Go to the documentation of this file.
1 /// \file RNTupleDescriptorFmt.cxx
2 /// \ingroup NTuple ROOT7
3 /// \author Jakob Blomer <jblomer@cern.ch>
4 /// \date 2019-08-25
5 /// \warning This is part of the ROOT 7 prototype! It will change without notice. It might trigger earthquakes. Feedback
6 /// is welcome!
7 
8 /*************************************************************************
9  * Copyright (C) 1995-2019, Rene Brun and Fons Rademakers. *
10  * All rights reserved. *
11  * *
12  * For the licensing terms see $ROOTSYS/LICENSE. *
13  * For the list of contributors see $ROOTSYS/README/CREDITS. *
14  *************************************************************************/
15 
16 #include <ROOT/RColumnElement.hxx>
17 #include <ROOT/RColumnModel.hxx>
19 #include <ROOT/RNTupleUtil.hxx>
20 
21 #include <algorithm>
22 #include <iomanip>
23 #include <ostream>
24 #include <unordered_map>
25 #include <vector>
26 
27 namespace {
28 
29 struct ClusterInfo {
30  std::uint64_t fFirstEntry = 0;
31  std::uint32_t fNPages = 0;
32  std::uint32_t fNEntries = 0;
33  std::uint32_t fBytesOnStorage = 0;
34  std::uint32_t fBytesInMemory = 0;
35 
36  bool operator ==(const ClusterInfo &other) const {
37  return fFirstEntry == other.fFirstEntry;
38  }
39 
40  bool operator <(const ClusterInfo &other) const {
41  return fFirstEntry < other.fFirstEntry;
42  }
43 };
44 
45 struct ColumnInfo {
46  ROOT::Experimental::DescriptorId_t fFieldId = 0;
47  std::uint64_t fLocalOrder = 0;
48  std::uint64_t fNElements = 0;
49  std::uint64_t fNPages = 0;
50  std::uint64_t fBytesOnStorage = 0;
51  std::uint32_t fElementSize = 0;
52  ROOT::Experimental::EColumnType fType;
53  std::string fFieldName;
54 
55  bool operator <(const ColumnInfo &other) const {
56  if (fFieldName == other.fFieldName)
57  return fLocalOrder < other.fLocalOrder;
58  return fFieldName < other.fFieldName;
59  }
60 };
61 
62 static std::string GetFieldName(ROOT::Experimental::DescriptorId_t fieldId,
63  const ROOT::Experimental::RNTupleDescriptor &ntupleDesc)
64 {
65  const auto &fieldDesc = ntupleDesc.GetFieldDescriptor(fieldId);
66  if (fieldDesc.GetParentId() == ROOT::Experimental::kInvalidDescriptorId)
67  return fieldDesc.GetFieldName();
68  return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) + "." + fieldDesc.GetFieldName();
69 }
70 
71 static std::string GetColumnTypeName(ROOT::Experimental::EColumnType type)
72 {
73  switch (type) {
74  case ROOT::Experimental::EColumnType::kBit:
75  return "Bit";
76  case ROOT::Experimental::EColumnType::kByte:
77  return "Byte";
78  case ROOT::Experimental::EColumnType::kInt32:
79  return "Int32";
80  case ROOT::Experimental::EColumnType::kInt64:
81  return "Int64";
82  case ROOT::Experimental::EColumnType::kReal32:
83  return "Real32";
84  case ROOT::Experimental::EColumnType::kReal64:
85  return "Real64";
86  case ROOT::Experimental::EColumnType::kIndex:
87  return "Index";
88  case ROOT::Experimental::EColumnType::kSwitch:
89  return "Switch";
90  default:
91  return "UNKNOWN";
92  }
93 }
94 
95 } // anonymous namespace
96 
97 void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output) const
98 {
99  std::vector<ColumnInfo> columns;
100  std::vector<ClusterInfo> clusters;
101  std::unordered_map<DescriptorId_t, unsigned int> cluster2Idx;
102  for (const auto &cluster : fClusterDescriptors) {
103  ClusterInfo info;
104  info.fFirstEntry = cluster.second.GetFirstEntryIndex();
105  info.fNEntries = cluster.second.GetNEntries();
106  cluster2Idx[cluster.first] = clusters.size();
107  clusters.emplace_back(info);
108  }
109 
110  std::uint64_t bytesOnStorage = 0;
111  std::uint64_t bytesInMemory = 0;
112  std::uint64_t nPages = 0;
113  int compression = -1;
114  for (const auto &column : fColumnDescriptors) {
115  auto element = Detail::RColumnElementBase::Generate(column.second.GetModel().GetType());
116  auto elementSize = element.GetSize();
117 
118  ColumnInfo info;
119  info.fFieldId = column.second.GetFieldId();
120  info.fLocalOrder = column.second.GetIndex();
121  info.fElementSize = elementSize;
122  info.fType = column.second.GetModel().GetType();
123 
124  for (const auto &cluster : fClusterDescriptors) {
125  auto columnRange = cluster.second.GetColumnRange(column.first);
126  info.fNElements += columnRange.fNElements;
127  if (compression == -1) {
128  compression = columnRange.fCompressionSettings;
129  }
130  const auto &pageRange = cluster.second.GetPageRange(column.first);
131  auto idx = cluster2Idx[cluster.first];
132  for (const auto &page : pageRange.fPageInfos) {
133  bytesOnStorage += page.fLocator.fBytesOnStorage;
134  bytesInMemory += page.fNElements * elementSize;
135  clusters[idx].fBytesOnStorage += page.fLocator.fBytesOnStorage;
136  clusters[idx].fBytesInMemory += page.fNElements * elementSize;
137  ++clusters[idx].fNPages;
138  info.fBytesOnStorage += page.fLocator.fBytesOnStorage;
139  ++info.fNPages;
140  ++nPages;
141  }
142  }
143  columns.emplace_back(info);
144  }
145  auto headerSize = SerializeHeader(nullptr);
146  auto footerSize = SerializeFooter(nullptr);
147  output << "============================================================" << std::endl;
148  output << "NTUPLE: " << GetName() << std::endl;
149  output << "Compression: " << compression << std::endl;
150  output << "------------------------------------------------------------" << std::endl;
151  output << " # Entries: " << GetNEntries() << std::endl;
152  output << " # Fields: " << GetNFields() << std::endl;
153  output << " # Columns: " << GetNColumns() << std::endl;
154  output << " # Pages: " << nPages << std::endl;
155  output << " # Clusters: " << GetNClusters() << std::endl;
156  output << " Size on storage: " << bytesOnStorage << " B" << std::endl;
157  output << " Compression rate: " << std::fixed << std::setprecision(2)
158  << float(bytesInMemory) / float(bytesOnStorage) << std::endl;
159  output << " Header size: " << headerSize << " B" << std::endl;
160  output << " Footer size: " << footerSize << " B" << std::endl;
161  output << " Meta-data / data: " << std::fixed << std::setprecision(3)
162  << float(headerSize + footerSize) / float(bytesOnStorage) << std::endl;
163  output << "------------------------------------------------------------" << std::endl;
164  output << "CLUSTER DETAILS" << std::endl;
165  output << "------------------------------------------------------------" << std::endl;
166 
167  std::sort(clusters.begin(), clusters.end());
168  for (unsigned int i = 0; i < clusters.size(); ++i) {
169  output << " # " << std::setw(5) << i
170  << " Entry range: [" << clusters[i].fFirstEntry << ".."
171  << clusters[i].fFirstEntry + clusters[i].fNEntries << ") -- " << clusters[i].fNEntries << std::endl;
172  output << " "
173  << " # Pages: " << clusters[i].fNPages << std::endl;
174  output << " "
175  << " Size on storage: " << clusters[i].fBytesOnStorage << " B" << std::endl;
176  output << " "
177  << " Compression: " << std::fixed << std::setprecision(2)
178  << float(clusters[i].fBytesInMemory) / float(float(clusters[i].fBytesOnStorage)) << std::endl;
179  }
180 
181  output << "------------------------------------------------------------" << std::endl;
182  output << "COLUMN DETAILS" << std::endl;
183  output << "------------------------------------------------------------" << std::endl;
184  for (auto &col : columns)
185  col.fFieldName = GetFieldName(col.fFieldId, *this).substr(1);
186  std::sort(columns.begin(), columns.end());
187  for (const auto &col : columns) {
188  auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages);
189  auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
190  output << " " << col.fFieldName << " [#" << col.fLocalOrder << "]" << " -- "
191  << GetColumnTypeName(col.fType) << std::endl;
192  output << " # Elements: " << col.fNElements << std::endl;
193  output << " # Pages: " << col.fNPages << std::endl;
194  output << " Avg elements / page: " << avgElementsPerPage << std::endl;
195  output << " Avg page size: " << avgPageSize << " B" << std::endl;
196  output << " Size on storage: " << col.fBytesOnStorage << " B" << std::endl;
197  output << " Compression: " << std::fixed << std::setprecision(2)
198  << float(col.fElementSize * col.fNElements) / float(col.fBytesOnStorage) << std::endl;
199  output << "............................................................" << std::endl;
200  }
201 }