24 #include <unordered_map>
30 std::uint64_t fFirstEntry = 0;
31 std::uint32_t fNPages = 0;
32 std::uint32_t fNEntries = 0;
33 std::uint32_t fBytesOnStorage = 0;
34 std::uint32_t fBytesInMemory = 0;
36 bool operator ==(
const ClusterInfo &other)
const {
37 return fFirstEntry == other.fFirstEntry;
40 bool operator <(
const ClusterInfo &other)
const {
41 return fFirstEntry < other.fFirstEntry;
46 ROOT::Experimental::DescriptorId_t fFieldId = 0;
47 std::uint64_t fLocalOrder = 0;
48 std::uint64_t fNElements = 0;
49 std::uint64_t fNPages = 0;
50 std::uint64_t fBytesOnStorage = 0;
51 std::uint32_t fElementSize = 0;
52 ROOT::Experimental::EColumnType fType;
53 std::string fFieldName;
55 bool operator <(
const ColumnInfo &other)
const {
56 if (fFieldName == other.fFieldName)
57 return fLocalOrder < other.fLocalOrder;
58 return fFieldName < other.fFieldName;
62 static std::string GetFieldName(ROOT::Experimental::DescriptorId_t fieldId,
63 const ROOT::Experimental::RNTupleDescriptor &ntupleDesc)
65 const auto &fieldDesc = ntupleDesc.GetFieldDescriptor(fieldId);
66 if (fieldDesc.GetParentId() == ROOT::Experimental::kInvalidDescriptorId)
67 return fieldDesc.GetFieldName();
68 return GetFieldName(fieldDesc.GetParentId(), ntupleDesc) +
"." + fieldDesc.GetFieldName();
71 static std::string GetColumnTypeName(ROOT::Experimental::EColumnType type)
74 case ROOT::Experimental::EColumnType::kBit:
76 case ROOT::Experimental::EColumnType::kByte:
78 case ROOT::Experimental::EColumnType::kInt32:
80 case ROOT::Experimental::EColumnType::kInt64:
82 case ROOT::Experimental::EColumnType::kReal32:
84 case ROOT::Experimental::EColumnType::kReal64:
86 case ROOT::Experimental::EColumnType::kIndex:
88 case ROOT::Experimental::EColumnType::kSwitch:
97 void ROOT::Experimental::RNTupleDescriptor::PrintInfo(std::ostream &output)
const
99 std::vector<ColumnInfo> columns;
100 std::vector<ClusterInfo> clusters;
101 std::unordered_map<DescriptorId_t, unsigned int> cluster2Idx;
102 for (
const auto &cluster : fClusterDescriptors) {
104 info.fFirstEntry = cluster.second.GetFirstEntryIndex();
105 info.fNEntries = cluster.second.GetNEntries();
106 cluster2Idx[cluster.first] = clusters.size();
107 clusters.emplace_back(info);
110 std::uint64_t bytesOnStorage = 0;
111 std::uint64_t bytesInMemory = 0;
112 std::uint64_t nPages = 0;
113 int compression = -1;
114 for (
const auto &column : fColumnDescriptors) {
115 auto element = Detail::RColumnElementBase::Generate(column.second.GetModel().GetType());
116 auto elementSize = element.GetSize();
119 info.fFieldId = column.second.GetFieldId();
120 info.fLocalOrder = column.second.GetIndex();
121 info.fElementSize = elementSize;
122 info.fType = column.second.GetModel().GetType();
124 for (
const auto &cluster : fClusterDescriptors) {
125 auto columnRange = cluster.second.GetColumnRange(column.first);
126 info.fNElements += columnRange.fNElements;
127 if (compression == -1) {
128 compression = columnRange.fCompressionSettings;
130 const auto &pageRange = cluster.second.GetPageRange(column.first);
131 auto idx = cluster2Idx[cluster.first];
132 for (
const auto &page : pageRange.fPageInfos) {
133 bytesOnStorage += page.fLocator.fBytesOnStorage;
134 bytesInMemory += page.fNElements * elementSize;
135 clusters[idx].fBytesOnStorage += page.fLocator.fBytesOnStorage;
136 clusters[idx].fBytesInMemory += page.fNElements * elementSize;
137 ++clusters[idx].fNPages;
138 info.fBytesOnStorage += page.fLocator.fBytesOnStorage;
143 columns.emplace_back(info);
145 auto headerSize = SerializeHeader(
nullptr);
146 auto footerSize = SerializeFooter(
nullptr);
147 output <<
"============================================================" << std::endl;
148 output <<
"NTUPLE: " << GetName() << std::endl;
149 output <<
"Compression: " << compression << std::endl;
150 output <<
"------------------------------------------------------------" << std::endl;
151 output <<
" # Entries: " << GetNEntries() << std::endl;
152 output <<
" # Fields: " << GetNFields() << std::endl;
153 output <<
" # Columns: " << GetNColumns() << std::endl;
154 output <<
" # Pages: " << nPages << std::endl;
155 output <<
" # Clusters: " << GetNClusters() << std::endl;
156 output <<
" Size on storage: " << bytesOnStorage <<
" B" << std::endl;
157 output <<
" Compression rate: " << std::fixed << std::setprecision(2)
158 << float(bytesInMemory) / float(bytesOnStorage) << std::endl;
159 output <<
" Header size: " << headerSize <<
" B" << std::endl;
160 output <<
" Footer size: " << footerSize <<
" B" << std::endl;
161 output <<
" Meta-data / data: " << std::fixed << std::setprecision(3)
162 << float(headerSize + footerSize) / float(bytesOnStorage) << std::endl;
163 output <<
"------------------------------------------------------------" << std::endl;
164 output <<
"CLUSTER DETAILS" << std::endl;
165 output <<
"------------------------------------------------------------" << std::endl;
167 std::sort(clusters.begin(), clusters.end());
168 for (
unsigned int i = 0; i < clusters.size(); ++i) {
169 output <<
" # " << std::setw(5) << i
170 <<
" Entry range: [" << clusters[i].fFirstEntry <<
".."
171 << clusters[i].fFirstEntry + clusters[i].fNEntries <<
") -- " << clusters[i].fNEntries << std::endl;
173 <<
" # Pages: " << clusters[i].fNPages << std::endl;
175 <<
" Size on storage: " << clusters[i].fBytesOnStorage <<
" B" << std::endl;
177 <<
" Compression: " << std::fixed << std::setprecision(2)
178 << float(clusters[i].fBytesInMemory) / float(
float(clusters[i].fBytesOnStorage)) << std::endl;
181 output <<
"------------------------------------------------------------" << std::endl;
182 output <<
"COLUMN DETAILS" << std::endl;
183 output <<
"------------------------------------------------------------" << std::endl;
184 for (
auto &col : columns)
185 col.fFieldName = GetFieldName(col.fFieldId, *
this).substr(1);
186 std::sort(columns.begin(), columns.end());
187 for (
const auto &col : columns) {
188 auto avgPageSize = (col.fNPages == 0) ? 0 : (col.fBytesOnStorage / col.fNPages);
189 auto avgElementsPerPage = (col.fNPages == 0) ? 0 : (col.fNElements / col.fNPages);
190 output <<
" " << col.fFieldName <<
" [#" << col.fLocalOrder <<
"]" <<
" -- "
191 << GetColumnTypeName(col.fType) << std::endl;
192 output <<
" # Elements: " << col.fNElements << std::endl;
193 output <<
" # Pages: " << col.fNPages << std::endl;
194 output <<
" Avg elements / page: " << avgElementsPerPage << std::endl;
195 output <<
" Avg page size: " << avgPageSize <<
" B" << std::endl;
196 output <<
" Size on storage: " << col.fBytesOnStorage <<
" B" << std::endl;
197 output <<
" Compression: " << std::fixed << std::setprecision(2)
198 << float(col.fElementSize * col.fNElements) / float(col.fBytesOnStorage) << std::endl;
199 output <<
"............................................................" << std::endl;