36 ClassImp(TFileCollection);
43 TFileCollection::TFileCollection(
const char *name,
const char *title,
44 const char *textfile, Int_t nfiles, Int_t firstfile)
45 : TNamed(name, title), fList(0), fMetaDataList(0), fDefaultTree(),
46 fTotalSize(0), fNFiles(0), fNStagedFiles(0), fNCorruptFiles(0)
48 fList =
new THashList();
51 fMetaDataList =
new TList;
52 fMetaDataList->SetOwner();
54 AddFromFile(textfile, nfiles, firstfile);
60 TFileCollection::~TFileCollection()
69 Int_t TFileCollection::Add(TFileInfo *info)
72 if (!fList->FindObject(info->GetName())) {
74 if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
78 Warning(
"Add",
"file: '%s' already in the list - ignoring",
79 info->GetCurrentUrl()->GetUrl());
88 Int_t TFileCollection::Add(TFileCollection *coll)
90 if (fList && coll && coll->GetList()) {
91 TIter nxfi(coll->GetList());
93 while ((fi = (TFileInfo *) nxfi())) {
94 TFileInfo *info =
new TFileInfo(*fi);
96 if (fi->GetIndex() < 0) info->SetIndex(fList->GetSize());
112 Int_t TFileCollection::AddFromFile(
const char *textfile, Int_t nfiles, Int_t firstfile)
118 TString fn(textfile);
119 if (!fn.IsNull() && !gSystem->ExpandPathName(fn)) {
123 Bool_t all = (nfiles <= 0) ? kTRUE : kFALSE;
124 Int_t ff = (!all && (firstfile < 1)) ? 1 : firstfile;
126 while (f.good() && (all || nf < nfiles)) {
130 if (!line.IsWhitespace() && !line.BeginsWith(
"#")) {
132 if (all || nn >= ff) {
133 TFileInfo *info =
new TFileInfo(line);
135 if (info->GetIndex() < 0) info->SetIndex(fList->GetSize());
143 Error(
"AddFromFile",
"unable to open file %s (%s)", textfile, fn.Data());
155 Int_t TFileCollection::Add(
const char *dir)
163 Error(
"Add",
"input dir undefined");
169 TString baseDir = gSystem->DirName(dir);
171 if (gSystem->GetPathInfo(dir, st) == 0 ||
172 gSystem->GetPathInfo(baseDir, tmp) == 0) {
174 if (R_ISREG(st.fMode)) {
176 TFileInfo *info =
new TFileInfo(dir);
177 info->SetBit(TFileInfo::kStaged);
183 void *dataSetDir = gSystem->OpenDirectory(gSystem->DirName(dir));
186 Error(
"Add",
"directory %s cannot be opened",
187 gSystem->DirName(dir));
190 TString filesExp(TString(
"^") + gSystem->BaseName(dir) +
"$");
191 filesExp.ReplaceAll(
"*",
".*");
192 TRegexp rg(filesExp);
193 while ((ent = gSystem->GetDirEntry(dataSetDir))) {
194 TString entryString(ent);
195 if (entryString.Index(rg) != kNPOS) {
197 TString fn = gSystem->DirName(dir);
200 gSystem->GetPathInfo(fn, st);
201 if (R_ISREG(st.fMode)) {
203 TFileInfo *info =
new TFileInfo(fn);
204 info->SetBit(TFileInfo::kStaged);
211 gSystem->FreeDirectory(dataSetDir);
223 Int_t TFileCollection::RemoveDuplicates()
225 THashList *hl =
new THashList;
228 Int_t n0 = fList->GetSize();
231 while ((fi = (TFileInfo *)nxfi())) {
232 if (!(hl->FindObject(fi->GetUUID()->AsString()))) {
235 fi->SetName(fi->GetUUID()->AsString());
242 Int_t nr = n0 - fList->GetSize();
244 Info(
"RemoveDuplicates",
"%d duplicates found and removed", nr);
252 TFileCollection *TFileCollection::GetStagedSubset()
257 TFileCollection *subset =
new TFileCollection(GetName(), GetTitle());
260 TFileInfo *fileInfo = 0;
261 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
262 if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted))
263 subset->Add(fileInfo);
276 Long64_t TFileCollection::Merge(TCollection *li)
280 if (li->IsEmpty())
return 0;
284 while (TObject *o = next()) {
285 TFileCollection* coll =
dynamic_cast<TFileCollection*
> (o);
287 Error(
"Add",
"attempt to add object of class: %s to a %s",
288 o->ClassName(),this->ClassName());
308 Int_t TFileCollection::Update(Long64_t avgsize)
321 TIter nxm(fMetaDataList);
322 TFileInfoMeta *m = 0;
323 while ((m = (TFileInfoMeta *)nxm())) {
324 if (!(m->TestBit(TFileInfoMeta::kExternal))) {
325 fMetaDataList->Remove(m);
330 fNFiles = fList->GetEntries();
333 TFileInfo *fileInfo = 0;
334 while ((fileInfo = dynamic_cast<TFileInfo*> (iter.Next()))) {
336 if (fileInfo->GetSize() > 0) {
337 fTotalSize += fileInfo->GetSize();
342 fTotalSize += avgsize;
346 if (fileInfo->TestBit(TFileInfo::kStaged) && !fileInfo->TestBit(TFileInfo::kCorrupted)) {
349 if (fileInfo->GetMetaDataList()) {
350 TIter metaDataIter(fileInfo->GetMetaDataList());
353 while ((obj = metaDataIter.Next())) {
354 TFileInfoMeta *metaData =
dynamic_cast<TFileInfoMeta*
>(obj);
357 if (!metaData->IsTree())
361 TFileInfoMeta *metaDataSum =
dynamic_cast<TFileInfoMeta*
>(fMetaDataList->FindObject(metaData->GetName()));
362 Bool_t newObj = kFALSE;
365 metaDataSum =
new TFileInfoMeta(metaData->GetName(), metaData->GetTitle());
366 fMetaDataList->Add(metaDataSum);
372 metaDataSum->SetEntries(metaData->GetEntries());
374 metaDataSum->SetEntries(metaDataSum->GetEntries() + metaData->GetEntries());
378 if (fileInfo->TestBit(TFileInfo::kCorrupted))
400 void TFileCollection::Print(Option_t *option)
const
403 TPMERegexp re(
"(^|;| )filter:([SsCc]+)( |;|$)", 4);
404 if (re.Match(option) == 4) {
405 TString showOnly = re[2];
406 PrintDetailed(showOnly);
410 Printf(
"TFileCollection %s - %s contains: %lld files with a size of"
411 " %lld bytes, %.1f %% staged - default tree name: '%s'",
412 GetName(), GetTitle(), fNFiles, fTotalSize, GetStagedPercentage(),
413 GetDefaultTreeName());
415 if (opt.Contains(
"M", TString::kIgnoreCase)) {
416 Printf(
"The files contain the following trees:");
418 TIter metaDataIter(fMetaDataList);
419 TFileInfoMeta* metaData = 0;
420 while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
421 if (!metaData->IsTree())
424 Printf(
"Tree %s: %lld events", metaData->GetName(), metaData->GetEntries());
428 if (fList && opt.Contains(
"F", TString::kIgnoreCase)) {
429 Printf(
"The collection contains the following files:");
430 if (!opt.Contains(
"L") && !fDefaultTree.IsNull())
431 opt += TString::Format(
" T:%s", fDefaultTree.Data());
439 void TFileCollection::PrintDetailed(TString &showOnly)
const
441 Bool_t bS, bs, bC, bc;
442 bS = bs = bC = bc = kFALSE;
444 if (showOnly.Index(
'S') >= 0) bS = kTRUE;
445 if (showOnly.Index(
's') >= 0) bs = kTRUE;
446 if (showOnly.Index(
'C') >= 0) bC = kTRUE;
447 if (showOnly.Index(
'c') >= 0) bc = kTRUE;
450 if (!bc && !bC) bc = bC = kTRUE;
451 if (!bs && !bS) bs = bS = kTRUE;
456 UInt_t countMatch = 0;
458 Printf(
"\033[1m #. SC | Entries | Size | URL\033[m");
463 while ((info = dynamic_cast<TFileInfo *>(it.Next()))) {
465 Bool_t s = info->TestBit(TFileInfo::kStaged);
466 Bool_t c = info->TestBit(TFileInfo::kCorrupted);
472 if ( ((s && bS) || (!s && bs)) && ((c && bC) || (!c && bc)) ) {
474 TFileInfoMeta *meta = info->GetMetaData();
477 if (meta) entries = meta->GetEntries();
479 FormatSize(info->GetSize(), um, sz);
483 TUrl *curUrl = info->GetCurrentUrl();
484 const char *curUrlStr = curUrl ? curUrl->GetUrl() :
"n.a.";
485 Printf(
"\033[1m%4u.\033[m %c%c | %-7s | %6.1lf %s | %s",
487 (s ?
'S' :
's'), (c ?
'C' :
'c'),
488 ((entries > 0) ? Form(
"% 7d", entries) :
"n.a."),
489 sz, um.Data(), curUrlStr);
493 while ((url = info->NextUrl())) {
494 Printf(
" | | | %s", url->GetUrl());
504 Printf(
">> There are \033[1m%u\033[m file(s) in dataset: "
505 "\033[1m%u (%5.1f%%)\033[m matched your criteria (%s)",
506 countAll, countMatch,
507 100.*(Float_t)countMatch/(Float_t)countAll, showOnly.Data());
509 FormatSize(fTotalSize, um, sz);
510 Printf(
">> Total size : \033[1m%.1f %s\033[m", sz, um.Data());
511 Printf(
">> Staged (S) : \033[1m%5.1f %%\033[m", GetStagedPercentage());
512 Printf(
">> Corrupted (C) : \033[1m%5.1f %%\033[m",
513 GetCorruptedPercentage());
517 Printf(
">> No files in dataset");
520 const char *treeName = GetDefaultTreeName();
521 Printf(
">> Default tree : \033[1m%s\033[m",
522 (treeName ? treeName :
"(no default tree)"));
529 void TFileCollection::FormatSize(Long64_t bytes, TString &um,
530 Double_t &size)
const
532 static const char *ums[] = {
"byt",
"KiB",
"MiB",
"GiB",
"TiB" };
533 Int_t maxDiv =
sizeof(ums)/
sizeof(
const char *);
537 while ((b >= 1024.) && (nDiv+1 < maxDiv)) {
549 void TFileCollection::SetAnchor(
const char *anchor)
555 TFileInfo *fileInfo = 0;
556 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next()))) {
557 fileInfo->ResetUrl();
559 while ((url = fileInfo->NextUrl()))
560 url->SetAnchor(anchor);
561 fileInfo->ResetUrl();
568 void TFileCollection::SetBitAll(UInt_t f)
574 TFileInfo *fileInfo = 0;
575 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
582 void TFileCollection::ResetBitAll(UInt_t f)
588 TFileInfo *fileInfo = 0;
589 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
590 fileInfo->ResetBit(f);
598 const char *TFileCollection::GetDefaultTreeName()
const
600 if (fDefaultTree.Length() > 0)
603 TIter metaDataIter(fMetaDataList);
604 TFileInfoMeta *metaData = 0;
605 while ((metaData = dynamic_cast<TFileInfoMeta*>(metaDataIter.Next()))) {
606 if (!metaData->IsTree())
608 return metaData->GetName();
618 Long64_t TFileCollection::GetTotalEntries(
const char *tree)
const
620 if (!tree || !*tree) {
621 tree = GetDefaultTreeName();
626 TFileInfoMeta *metaData =
dynamic_cast<TFileInfoMeta*
>(fMetaDataList->FindObject(tree));
630 return metaData->GetEntries();
637 TFileInfoMeta *TFileCollection::GetMetaData(
const char *meta)
const
642 return dynamic_cast<TFileInfoMeta*
>(fMetaDataList->FindObject(meta));
649 void TFileCollection::SetDefaultMetaData(
const char *meta)
651 TFileInfoMeta *fim = GetMetaData(meta);
653 fMetaDataList->Remove(fim);
654 fMetaDataList->AddFirst(fim);
662 void TFileCollection::RemoveMetaData(
const char *meta)
666 TFileInfo *fileInfo = 0;
667 while ((fileInfo = dynamic_cast<TFileInfo*>(iter.Next())))
668 fileInfo->RemoveMetaData(meta);
672 TObject* obj = fMetaDataList->FindObject(
"meta");
674 fMetaDataList->Remove(obj);
678 fMetaDataList->Clear();
684 void TFileCollection::Sort(Bool_t useindex)
691 SetBitAll(TFileInfo::kSortWithIndex);
693 ResetBitAll(TFileInfo::kSortWithIndex);
704 TObjString *TFileCollection::ExportInfo(
const char *name, Int_t popt)
707 if (GetDefaultTreeName()) {
708 TFileInfoMeta* meta = GetMetaData(GetDefaultTreeName());
710 treeInfo = GetDefaultTreeName();
712 treeInfo += TString::Format(
", %lld entries", meta->GetEntries());
713 TFileInfoMeta *frac = GetMetaData(
"/FractionOfTotal");
715 treeInfo += TString::Format(
", %3.1f %% of total", frac->GetEntries() / 10.);
717 treeInfo.Form(
" %s ", GetDefaultTreeName());
718 if (treeInfo.Length() > 14) treeInfo.Replace(13, 1,
'>');
721 if (meta->GetEntries() > 99999999) {
722 treeInfo += TString::Format(
"| %8lld ", meta->GetEntries());
724 treeInfo += TString::Format(
"| %8.4g ", (Double_t) meta->GetEntries());
731 if (popt == 0) treeInfo.Resize(25);
734 const char *unit[4] = {
"kB",
"MB",
"GB",
"TB"};
736 Long64_t refsz = 1024;
737 Long64_t xsz = (Long64_t) (GetTotalSize() / refsz);
738 while (xsz > 1024 && k < 3) {
741 xsz = (Long64_t) (GetTotalSize() / refsz);
745 TString dsname(name);
746 if (dsname.IsNull()) dsname = GetName();
749 TObjString *outs = 0;
751 outs =
new TObjString(Form(
"%s %lld files, %lld %s, staged %d %%, tree: %s", dsname.Data(),
752 GetNFiles(), xsz, unit[k],
753 (Int_t)GetStagedPercentage(), treeInfo.Data()));
755 outs =
new TObjString(Form(
"%s| %7lld |%s| %5lld %s | %3d %%", dsname.Data(),
756 GetNFiles(), treeInfo.Data(), xsz, unit[k],
757 (Int_t)GetStagedPercentage()));
767 TFileCollection *TFileCollection::GetFilesOnServer(
const char *server)
769 TFileCollection *fc = (TFileCollection *)0;
772 if (!server || strlen(server) <= 0) {
773 Info(
"GetFilesOnServer",
"server undefined - do nothing");
778 if (!fList || fList->GetSize() <= 0) {
779 Info(
"GetFilesOnServer",
"the list is empty - do nothing");
785 TString srv, scheme(
"root"), port;
786 if (uri.GetScheme() !=
"") scheme = uri.GetScheme();
787 if (uri.GetPort() !=
"") port.Form(
":%s", uri.GetPort().Data());
788 srv.Form(
"%s://%s%s", scheme.Data(), TUrl(server).GetHostFQDN(), port.Data());
790 Info(
"GetFilesOnServer",
"searching for files on server: '%s' (input: '%s')",
794 fc =
new TFileCollection(GetName());
796 if (GetTitle() && strlen(GetTitle()) > 0) {
797 title.Form(
"%s (subset on server %s)", GetTitle(), srv.Data());
799 title.Form(
"subset of '%s' on server %s", GetName(), srv.Data());
801 fc->SetTitle(title.Data());
803 fc->SetDefaultTreeName(GetDefaultTreeName());
811 while ((fi = (TFileInfo *)nxf())) {
813 if ((xu = fi->FindByUrl(srv.Data()))) {
815 TFileInfo *nfi =
new TFileInfo(xu->GetUrl(), fi->GetSize(),
816 fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
817 fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
818 if (fi->GetMetaDataList()) {
819 TIter nxm(fi->GetMetaDataList());
820 TFileInfoMeta *md = 0;
821 while ((md = (TFileInfoMeta *) nxm())) {
822 nfi->AddMetaData(
new TFileInfoMeta(*md));
825 if (fi->TestBit(TFileInfo::kStaged)) nfi->SetBit(TFileInfo::kStaged);
826 if (fi->TestBit(TFileInfo::kCorrupted)) nfi->SetBit(TFileInfo::kCorrupted);
828 Info(
"GetFilesOnServer",
"adding: %s", xu->GetUrl());
834 if (fc->GetList()->GetSize() <= 0) {
837 Info(
"GetFilesOnServer",
"dataset '%s' has no files on server: '%s' (searched for: '%s')",
838 GetName(), server, srv.Data());
845 Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
846 TFileInfoMeta *m =
new TFileInfoMeta(
"FractionOfTotal",
"External Info", xf);
847 m->SetBit(TFileInfoMeta::kExternal);
861 TMap *TFileCollection::GetFilesPerServer(
const char *exclude, Bool_t curronly)
866 if (!fList || fList->GetSize() <= 0) {
867 Info(
"GetFilesPerServer",
"the list is empty - do nothing");
873 if (exclude && strlen(exclude) > 0) {
874 excl =
new THashList;
877 TString srvs(exclude), s, srv, scheme, port;
879 while (srvs.Tokenize(s, from,
",")) {
880 uri.SetUri(s.Data());
883 if (uri.GetScheme() !=
"") scheme = uri.GetScheme();
884 if (uri.GetPort() !=
"") port.Form(
":%s", uri.GetPort().Data());
885 srv.Form(
"%s://%s%s", scheme.Data(), TUrl(s.Data()).GetHostFQDN(), port.Data());
887 excl->Add(
new TObjString(srv.Data()));
899 TFileCollection *fc = 0;
900 while ((fi = (TFileInfo *)nxf())) {
902 TUrl *curl = fi->GetCurrentUrl();
904 if (!curronly) fi->ResetUrl();
906 while ((xurl = (curronly) ? curl : fi->NextUrl())) {
908 key.Form(
"%s://%s", xurl->GetProtocol(), xurl->GetHostFQDN());
910 if (excl && excl->FindObject(key.Data())) {
913 }
else if (excl && xurl->GetPort() > 0) {
915 key += TString::Format(
":%d", xurl->GetPort());
916 if (excl->FindObject(key.Data())) {
923 if (!(ent = (TPair *) dsmap->FindObject(key.Data()))) {
925 fc =
new TFileCollection(GetName());
927 if (GetTitle() && strlen(GetTitle()) > 0) {
928 title.Form(
"%s (subset on server %s)", GetTitle(), key.Data());
930 title.Form(
"subset of '%s' on server %s", GetName(), key.Data());
932 fc->SetTitle(title.Data());
934 fc->SetDefaultTreeName(GetDefaultTreeName());
936 dsmap->Add(
new TObjString(key.Data()), fc);
939 Info(
"GetFilesPerServer",
"found server: '%s' (fc: %p)", key.Data(), fc);
942 fc = (TFileCollection *) ent->Value();
945 TFileInfo *nfi =
new TFileInfo(xurl->GetUrl(kTRUE), fi->GetSize(),
946 fi->GetUUID() ? fi->GetUUID()->AsString() : 0,
947 fi->GetMD5() ? fi->GetMD5()->AsString() : 0);
948 if (fi->GetMetaDataList()) {
949 TIter nxm(fi->GetMetaDataList());
950 TFileInfoMeta *md = 0;
951 while ((md = (TFileInfoMeta *) nxm())) {
952 nfi->AddMetaData(
new TFileInfoMeta(*md));
955 if (fi->TestBit(TFileInfo::kStaged)) nfi->SetBit(TFileInfo::kStaged);
956 if (fi->TestBit(TFileInfo::kCorrupted)) nfi->SetBit(TFileInfo::kCorrupted);
962 fi->SetCurrentUrl(curl);
968 while ((k = nxk()) && (fc = (TFileCollection *) dsmap->GetValue(k))) {
971 Long64_t xf = (fc->GetTotalSize() * 1000) / GetTotalSize();
972 TFileInfoMeta *m =
new TFileInfoMeta(
"FractionOfTotal",
"External Info", xf);
973 m->SetBit(TFileInfoMeta::kExternal);
978 if (excl)
delete excl;
994 Bool_t TFileCollection::AddMetaData(TObject *meta)
997 if (!fMetaDataList) {
998 fMetaDataList =
new TList;
999 fMetaDataList->SetOwner();
1001 fMetaDataList->Add(meta);