35 unsigned int TTreeProcessorMT::fgMaxTasksPerFilePerWorker = 24U;
47 void TTreeView::MakeChain(
const std::string &treeName,
const std::vector<std::string> &fileNames,
48 const FriendInfo &friendInfo,
const std::vector<Long64_t> &nEntries,
49 const std::vector<std::vector<Long64_t>> &friendEntries)
51 const std::vector<NameAlias> &friendNames = friendInfo.fFriendNames;
52 const std::vector<std::vector<std::string>> &friendFileNames = friendInfo.fFriendFileNames;
54 fChain.reset(
new TChain(treeName.c_str()));
55 const auto nFiles = fileNames.size();
56 for (
auto i = 0u; i < nFiles; ++i) {
57 fChain->Add(fileNames[i].c_str(), nEntries[i]);
59 fChain->ResetBit(TObject::kMustCleanup);
62 const auto nFriends = friendNames.size();
63 for (
auto i = 0u; i < nFriends; ++i) {
64 const auto &friendName = friendNames[i];
65 const auto &name = friendName.first;
66 const auto &alias = friendName.second;
69 auto frChain = std::make_unique<TChain>(name.c_str());
70 const auto nFileNames = friendFileNames[i].size();
71 for (
auto j = 0u; j < nFileNames; ++j)
72 frChain->Add(friendFileNames[i][j].c_str(), friendEntries[i][j]);
75 fChain->AddFriend(frChain.get(), alias.c_str());
76 fFriends.emplace_back(std::move(frChain));
80 TTreeView::TreeReaderEntryListPair
81 TTreeView::MakeReaderWithEntryList(TEntryList &globalList, Long64_t start, Long64_t end)
86 std::vector<TEntryList*> globalEntryLists;
87 auto innerLists = globalList.GetLists();
89 if (globalList.GetN()) {
90 globalEntryLists.emplace_back(&globalList);
93 for (
auto lp : *innerLists) {
94 auto lpAsTEntryList =
static_cast<TEntryList *
>(lp);
95 if (lpAsTEntryList->GetN()) {
96 globalEntryLists.emplace_back(lpAsTEntryList);
101 auto localList = std::make_unique<TEntryList>();
103 for (
auto gl : globalEntryLists) {
104 Long64_t entry = gl->GetEntry(0);
107 auto tmp_list =
new TEntryList(gl->GetName(), gl->GetTitle(), gl->GetFileName(), gl->GetTreeName());
112 }
else if (entry >= start) {
113 tmp_list->Enter(entry);
115 }
while ((entry = gl->Next()) >= 0);
117 if (tmp_list->GetN() > 0) {
118 localList->Add(tmp_list);
124 auto reader = std::make_unique<TTreeReader>(fChain.get(), localList.get());
125 return std::make_pair(std::move(reader), std::move(localList));
128 std::unique_ptr<TTreeReader> TTreeView::MakeReader(Long64_t start, Long64_t end)
130 auto reader = std::make_unique<TTreeReader>(fChain.get());
131 reader->SetEntriesRange(start, end);
137 TTreeView::TreeReaderEntryListPair
138 TTreeView::GetTreeReader(Long64_t start, Long64_t end,
const std::string &treeName,
139 const std::vector<std::string> &fileNames,
const FriendInfo &friendInfo, TEntryList entryList,
140 const std::vector<Long64_t> &nEntries,
const std::vector<std::vector<Long64_t>> &friendEntries)
142 const bool usingLocalEntries = friendInfo.fFriendNames.empty() && entryList.GetN() == 0;
143 if (fChain ==
nullptr || (usingLocalEntries && fileNames[0] != fChain->GetListOfFiles()->At(0)->GetTitle()))
144 MakeChain(treeName, fileNames, friendInfo, nEntries, friendEntries);
146 std::unique_ptr<TTreeReader> reader;
147 std::unique_ptr<TEntryList> localList;
148 if (entryList.GetN() > 0) {
149 std::tie(reader, localList) = MakeReaderWithEntryList(entryList, start, end);
151 reader = MakeReader(start, end);
155 return std::make_pair(std::move(reader), std::move(localList));
161 using ClustersAndEntries = std::pair<std::vector<std::vector<EntryCluster>>, std::vector<Long64_t>>;
162 static ClustersAndEntries MakeClusters(
const std::string &treeName,
const std::vector<std::string> &fileNames)
166 TDirectory::TContext c;
167 const auto nFileNames = fileNames.size();
168 std::vector<std::vector<EntryCluster>> clustersPerFile;
169 std::vector<Long64_t> entriesPerFile;
170 entriesPerFile.reserve(nFileNames);
171 Long64_t offset = 0ll;
172 for (
const auto &fileName : fileNames) {
173 auto fileNameC = fileName.c_str();
174 std::unique_ptr<TFile> f(TFile::Open(fileNameC));
175 if (!f || f->IsZombie()) {
176 Error(
"TTreeProcessorMT::Process",
"An error occurred while opening file %s: skipping it.", fileNameC);
177 clustersPerFile.emplace_back(std::vector<EntryCluster>());
178 entriesPerFile.emplace_back(0ULL);
182 f->GetObject(treeName.c_str(), t);
185 Error(
"TTreeProcessorMT::Process",
"An error occurred while getting tree %s from file %s: skipping this file.",
186 treeName.c_str(), fileNameC);
187 clustersPerFile.emplace_back(std::vector<EntryCluster>());
188 entriesPerFile.emplace_back(0ULL);
192 auto clusterIter = t->GetClusterIterator(0);
193 Long64_t start = 0ll, end = 0ll;
194 const Long64_t entries = t->GetEntries();
196 std::vector<EntryCluster> clusters;
197 while ((start = clusterIter()) < entries) {
198 end = clusterIter.GetNextEntry();
200 clusters.emplace_back(EntryCluster{start + offset, end + offset});
203 clustersPerFile.emplace_back(std::move(clusters));
204 entriesPerFile.emplace_back(entries);
218 const auto maxTasksPerFile = TTreeProcessorMT::GetMaxTasksPerFilePerWorker() * ROOT::GetImplicitMTPoolSize();
219 std::vector<std::vector<EntryCluster>> eventRangesPerFile(clustersPerFile.size());
220 auto clustersPerFileIt = clustersPerFile.begin();
221 auto eventRangesPerFileIt = eventRangesPerFile.begin();
222 for (; clustersPerFileIt != clustersPerFile.end(); clustersPerFileIt++, eventRangesPerFileIt++) {
223 const auto clustersInThisFileSize = clustersPerFileIt->size();
224 const auto nFolds = clustersInThisFileSize / maxTasksPerFile;
229 clustersPerFileIt->begin(), clustersPerFileIt->end(),
230 [&eventRangesPerFileIt](
const EntryCluster &clust) { eventRangesPerFileIt->emplace_back(clust); });
235 auto nReminderClusters = clustersInThisFileSize % maxTasksPerFile;
236 const auto clustersInThisFile = *clustersPerFileIt;
237 for (
auto i = 0ULL; i < clustersInThisFileSize; ++i) {
238 const auto start = clustersInThisFile[i].start;
243 if (nReminderClusters > 0) {
247 const auto end = clustersInThisFile[i].end;
248 eventRangesPerFileIt->emplace_back(EntryCluster({start, end}));
252 return std::make_pair(std::move(eventRangesPerFile), std::move(entriesPerFile));
257 static std::vector<std::vector<Long64_t>>
258 GetFriendEntries(
const std::vector<std::pair<std::string, std::string>> &friendNames,
259 const std::vector<std::vector<std::string>> &friendFileNames)
261 std::vector<std::vector<Long64_t>> friendEntries;
262 const auto nFriends = friendNames.size();
263 for (
auto i = 0u; i < nFriends; ++i) {
264 std::vector<Long64_t> nEntries;
265 const auto &thisFriendName = friendNames[i].first;
266 const auto &thisFriendFiles = friendFileNames[i];
267 for (
const auto &fname : thisFriendFiles) {
268 std::unique_ptr<TFile> f(TFile::Open(fname.c_str()));
270 f->GetObject(thisFriendName.c_str(), t);
271 nEntries.emplace_back(t->GetEntries());
273 friendEntries.emplace_back(std::move(nEntries));
276 return friendEntries;
281 static std::string GetTreeFullPath(
const TTree &tree)
284 if (0 == strcmp(
"TChain", tree.ClassName())) {
285 auto &chain =
dynamic_cast<const TChain &
>(tree);
286 auto files = chain.GetListOfFiles();
287 if (files && 0 != files->GetEntries()) {
288 return files->At(0)->GetName();
293 if (
auto motherDir = tree.GetDirectory()) {
299 if (motherDir->InheritsFrom(
"TFile")) {
300 return tree.GetName();
302 std::string fullPath(motherDir->GetPath());
304 fullPath += tree.GetName();
309 return tree.GetName();
320 Internal::FriendInfo TTreeProcessorMT::GetFriendInfo(TTree &tree)
322 std::vector<Internal::NameAlias> friendNames;
323 std::vector<std::vector<std::string>> friendFileNames;
325 const auto friends = tree.GetListOfFriends();
327 return Internal::FriendInfo();
329 for (
auto fr : *friends) {
330 const auto frTree =
static_cast<TFriendElement *
>(fr)->GetTree();
333 const auto realName = frTree->GetName();
334 const auto alias = tree.GetFriendAlias(frTree);
336 friendNames.emplace_back(std::make_pair(realName, std::string(alias)));
338 friendNames.emplace_back(std::make_pair(realName,
""));
342 friendFileNames.emplace_back();
343 auto &fileNames = friendFileNames.back();
344 const bool isChain = tree.IsA() == TChain::Class();
346 const auto frChain =
static_cast<TChain *
>(frTree);
347 for (
auto f : *(frChain->GetListOfFiles())) {
348 fileNames.emplace_back(f->GetTitle());
351 const auto f = frTree->GetCurrentFile();
353 throw std::runtime_error(
"Friend trees with no associated file are not supported.");
354 fileNames.emplace_back(f->GetName());
358 return Internal::FriendInfo{std::move(friendNames), std::move(friendFileNames)};
363 std::string TTreeProcessorMT::FindTreeName()
365 std::string treeName;
367 if (fFileNames.empty())
368 throw std::runtime_error(
"Empty list of files and no tree name provided");
370 ::TDirectory::TContext ctxt(gDirectory);
371 std::unique_ptr<TFile> f(TFile::Open(fFileNames[0].c_str()));
372 TIter next(f->GetListOfKeys());
373 while (TKey *key = (TKey *)next()) {
374 const char *className = key->GetClassName();
375 if (strcmp(className,
"TTree") == 0) {
376 treeName = key->GetName();
380 if (treeName.empty())
381 throw std::runtime_error(
"Cannot find any tree in file " + fFileNames[0]);
392 TTreeProcessorMT::TTreeProcessorMT(std::string_view filename, std::string_view treename)
393 : fFileNames({std::string(filename)}), fTreeName(treename.empty() ? FindTreeName() : treename), fFriendInfo()
397 std::vector<std::string> CheckAndConvert(
const std::vector<std::string_view> &views)
400 throw std::runtime_error(
"The provided list of file names is empty");
402 std::vector<std::string> strings;
403 strings.reserve(views.size());
404 for (
const auto &v : views)
405 strings.emplace_back(v);
415 TTreeProcessorMT::TTreeProcessorMT(
const std::vector<std::string_view> &filenames, std::string_view treename)
416 : fFileNames(CheckAndConvert(filenames)), fTreeName(treename.empty() ? FindTreeName() : treename), fFriendInfo()
420 std::vector<std::string> GetFilesFromTree(TTree &tree)
422 std::vector<std::string> filenames;
424 const bool isChain = tree.IsA() == TChain::Class();
426 TObjArray *filelist =
static_cast<TChain &
>(tree).GetListOfFiles();
427 const auto nFiles = filelist->GetEntries();
429 throw std::runtime_error(
"The provided chain of files is empty");
430 filenames.reserve(nFiles);
431 for (
auto f : *filelist)
432 filenames.emplace_back(f->GetTitle());
434 TFile *f = tree.GetCurrentFile();
436 const auto msg =
"The specified TTree is not linked to any file, in-memory-only trees are not supported.";
437 throw std::runtime_error(msg);
440 filenames.emplace_back(f->GetName());
450 TTreeProcessorMT::TTreeProcessorMT(TTree &tree,
const TEntryList &entries)
451 : fFileNames(GetFilesFromTree(tree)), fTreeName(ROOT::Internal::GetTreeFullPath(tree)), fEntryList(entries),
452 fFriendInfo(GetFriendInfo(tree))
459 TTreeProcessorMT::TTreeProcessorMT(TTree &tree) : TTreeProcessorMT(tree, TEntryList()) {}
478 void TTreeProcessorMT::Process(std::function<
void(TTreeReader &)> func)
480 const std::vector<Internal::NameAlias> &friendNames = fFriendInfo.fFriendNames;
481 const std::vector<std::vector<std::string>> &friendFileNames = fFriendInfo.fFriendFileNames;
485 const bool hasFriends = !friendNames.empty();
486 const bool hasEntryList = fEntryList.GetN() > 0;
487 const bool shouldRetrieveAllClusters = hasFriends || hasEntryList;
488 const auto clustersAndEntries =
489 shouldRetrieveAllClusters ? Internal::MakeClusters(fTreeName, fFileNames) : Internal::ClustersAndEntries{};
490 const auto &clusters = clustersAndEntries.first;
491 const auto &entries = clustersAndEntries.second;
494 const auto friendEntries =
495 hasFriends ? Internal::GetFriendEntries(friendNames, friendFileNames) : std::vector<std::vector<Long64_t>>{};
497 TThreadExecutor pool;
499 using Internal::EntryCluster;
500 auto processFile = [&](std::size_t fileIdx) {
502 const auto &theseFiles = shouldRetrieveAllClusters ? fFileNames : std::vector<std::string>({fFileNames[fileIdx]});
504 const auto theseClustersAndEntries =
505 shouldRetrieveAllClusters ? Internal::ClustersAndEntries{} : Internal::MakeClusters(fTreeName, theseFiles);
508 const auto &thisFileClusters = shouldRetrieveAllClusters ? clusters[fileIdx] : theseClustersAndEntries.first[0];
511 const auto &theseEntries =
512 shouldRetrieveAllClusters ? entries : std::vector<Long64_t>({theseClustersAndEntries.second[0]});
514 auto processCluster = [&](
const Internal::EntryCluster &c) {
515 std::unique_ptr<TTreeReader> reader;
516 std::unique_ptr<TEntryList> elist;
517 std::tie(reader, elist) = fTreeView->GetTreeReader(c.start, c.end, fTreeName, theseFiles, fFriendInfo,
518 fEntryList, theseEntries, friendEntries);
522 pool.Foreach(processCluster, thisFileClusters);
525 std::vector<std::size_t> fileIdxs(fFileNames.size());
526 std::iota(fileIdxs.begin(), fileIdxs.end(), 0u);
529 Internal::TParTreeProcessingRAII ptpRAII;
531 pool.Foreach(processFile, fileIdxs);
537 unsigned int TTreeProcessorMT::GetMaxTasksPerFilePerWorker()
539 return fgMaxTasksPerFilePerWorker;
549 void TTreeProcessorMT::SetMaxTasksPerFilePerWorker(
unsigned int maxTasksPerFile)
551 fgMaxTasksPerFilePerWorker = maxTasksPerFile;