Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TProof.cxx
Go to the documentation of this file.
1 // @(#)root/proof:$Id: a2a50e759072c37ccbc65ecbcce735a76de86e95 $
2 // Author: Fons Rademakers 13/02/97
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 /**
12  \defgroup proof PROOF
13 
14  Classes defining the Parallel ROOT Facility, PROOF, a framework for parallel analysis of ROOT TTrees.
15 
16 */
17 
18 /**
19  \defgroup proofkernel PROOF kernel Libraries
20  \ingroup proof
21 
22  The PROOF kernel libraries (libProof, libProofPlayer, libProofDraw) contain the classes defining
23  the kernel of the PROOF facility, i.e. the protocol and the utilities to steer data processing
24  and handling of results.
25 
26 */
27 
28 /** \class TProof
29 \ingroup proofkernel
30 
31 This class controls a Parallel ROOT Facility, PROOF, cluster.
32 It fires the worker servers, it keeps track of how many workers are
33 running, it keeps track of the workers running status, it broadcasts
34 messages to all workers, it collects results, etc.
35 
36 */
37 
38 #include <stdlib.h>
39 #include <fcntl.h>
40 #include <errno.h>
41 #ifdef WIN32
42 # include <io.h>
43 # include <sys/stat.h>
44 # include <sys/types.h>
45 # include "snprintf.h"
46 #else
47 # include <unistd.h>
48 #endif
49 #include <vector>
50 
51 #include "RConfigure.h"
52 #include "Riostream.h"
53 #include "Getline.h"
54 #include "TBrowser.h"
55 #include "TChain.h"
56 #include "TCondor.h"
57 #include "TDSet.h"
58 #include "TError.h"
59 #include "TEnv.h"
60 #include "TEntryList.h"
61 #include "TEventList.h"
62 #include "TFile.h"
63 #include "TFileInfo.h"
64 #include "TFTP.h"
65 #include "THashList.h"
66 #include "TInterpreter.h"
67 #include "TKey.h"
68 #include "TMap.h"
69 #include "TMath.h"
70 #include "TMessage.h"
71 #include "TMonitor.h"
72 #include "TObjArray.h"
73 #include "TObjString.h"
74 #include "TParameter.h"
75 #include "TProof.h"
76 #include "TProofNodeInfo.h"
77 #include "TProofOutputFile.h"
78 #include "TVirtualProofPlayer.h"
79 #include "TVirtualPacketizer.h"
80 #include "TProofServ.h"
81 #include "TPluginManager.h"
82 #include "TQueryResult.h"
83 #include "TRandom.h"
84 #include "TRegexp.h"
85 #include "TROOT.h"
86 #include "TSlave.h"
87 #include "TSocket.h"
88 #include "TSortedList.h"
89 #include "TSystem.h"
90 #include "TTree.h"
91 #include "TUrl.h"
92 #include "TFileCollection.h"
93 #include "TDataSetManager.h"
94 #include "TDataSetManagerFile.h"
95 #include "TMacro.h"
96 #include "TSelector.h"
97 #include "TPRegexp.h"
98 #include "TPackMgr.h"
99 
100 #include <mutex>
101 
102 TProof *gProof = 0;
103 
104 // Rotating indicator
105 char TProofMergePrg::fgCr[4] = {'-', '\\', '|', '/'};
106 
107 TList *TProof::fgProofEnvList = 0; // List of env vars for proofserv
108 TPluginHandler *TProof::fgLogViewer = 0; // Log viewer handler
109 
110 ClassImp(TProof);
111 
112 //----- PROOF Interrupt signal handler -----------------------------------------
113 ////////////////////////////////////////////////////////////////////////////////
114 /// TProof interrupt handler.
115 
116 Bool_t TProofInterruptHandler::Notify()
117 {
118  if (!fProof->IsTty() || fProof->GetRemoteProtocol() < 22) {
119 
120  // Cannot ask the user : abort any remote processing
121  fProof->StopProcess(kTRUE);
122 
123  } else {
124  // Real stop or request to switch to asynchronous?
125  const char *a = 0;
126  if (fProof->GetRemoteProtocol() < 22) {
127  a = Getline("\nSwitch to asynchronous mode not supported remotely:"
128  "\nEnter S/s to stop, Q/q to quit, any other key to continue: ");
129  } else {
130  a = Getline("\nEnter A/a to switch asynchronous, S/s to stop, Q/q to quit,"
131  " any other key to continue: ");
132  }
133  if (a[0] == 'Q' || a[0] == 'S' || a[0] == 'q' || a[0] == 's') {
134 
135  Info("Notify","Processing interrupt signal ... %c", a[0]);
136 
137  // Stop or abort any remote processing
138  Bool_t abort = (a[0] == 'Q' || a[0] == 'q') ? kTRUE : kFALSE;
139  fProof->StopProcess(abort);
140 
141  } else if ((a[0] == 'A' || a[0] == 'a') && fProof->GetRemoteProtocol() >= 22) {
142  // Stop any remote processing
143  fProof->GoAsynchronous();
144  }
145  }
146 
147  return kTRUE;
148 }
149 
150 //----- Input handler for messages from TProofServ -----------------------------
151 ////////////////////////////////////////////////////////////////////////////////
152 /// Constructor
153 
154 TProofInputHandler::TProofInputHandler(TProof *p, TSocket *s)
155  : TFileHandler(s->GetDescriptor(),1),
156  fSocket(s), fProof(p)
157 {
158 }
159 
160 ////////////////////////////////////////////////////////////////////////////////
161 /// Handle input
162 
163 Bool_t TProofInputHandler::Notify()
164 {
165  fProof->CollectInputFrom(fSocket);
166  return kTRUE;
167 }
168 
169 
170 //------------------------------------------------------------------------------
171 
172 ClassImp(TSlaveInfo);
173 
174 ////////////////////////////////////////////////////////////////////////////////
175 /// Used to sort slaveinfos by ordinal.
176 
177 Int_t TSlaveInfo::Compare(const TObject *obj) const
178 {
179  if (!obj) return 1;
180 
181  const TSlaveInfo *si = dynamic_cast<const TSlaveInfo*>(obj);
182 
183  if (!si) return fOrdinal.CompareTo(obj->GetName());
184 
185  const char *myord = GetOrdinal();
186  const char *otherord = si->GetOrdinal();
187  while (myord && otherord) {
188  Int_t myval = atoi(myord);
189  Int_t otherval = atoi(otherord);
190  if (myval < otherval) return 1;
191  if (myval > otherval) return -1;
192  myord = strchr(myord, '.');
193  if (myord) myord++;
194  otherord = strchr(otherord, '.');
195  if (otherord) otherord++;
196  }
197  if (myord) return -1;
198  if (otherord) return 1;
199  return 0;
200 }
201 
202 ////////////////////////////////////////////////////////////////////////////////
203 /// Used to compare slaveinfos by ordinal.
204 
205 Bool_t TSlaveInfo::IsEqual(const TObject* obj) const
206 {
207  if (!obj) return kFALSE;
208  const TSlaveInfo *si = dynamic_cast<const TSlaveInfo*>(obj);
209  if (!si) return kFALSE;
210  return (strcmp(GetOrdinal(), si->GetOrdinal()) == 0);
211 }
212 
213 ////////////////////////////////////////////////////////////////////////////////
214 /// Print slave info. If opt = "active" print only the active
215 /// slaves, if opt="notactive" print only the not active slaves,
216 /// if opt = "bad" print only the bad slaves, else
217 /// print all slaves.
218 
219 void TSlaveInfo::Print(Option_t *opt) const
220 {
221  TString stat = fStatus == kActive ? "active" :
222  fStatus == kBad ? "bad" :
223  "not active";
224 
225  Bool_t newfmt = kFALSE;
226  TString oo(opt);
227  if (oo.Contains("N")) {
228  newfmt = kTRUE;
229  oo.ReplaceAll("N","");
230  }
231  if (oo == "active" && fStatus != kActive) return;
232  if (oo == "notactive" && fStatus != kNotActive) return;
233  if (oo == "bad" && fStatus != kBad) return;
234 
235  if (newfmt) {
236  TString msd, si, datadir;
237  if (!(fMsd.IsNull())) msd.Form("| msd: %s ", fMsd.Data());
238  if (!(fDataDir.IsNull())) datadir.Form("| datadir: %s ", fDataDir.Data());
239  if (fSysInfo.fCpus > 0) {
240  si.Form("| %s, %d cores, %d MB ram", fHostName.Data(),
241  fSysInfo.fCpus, fSysInfo.fPhysRam);
242  } else {
243  si.Form("| %s", fHostName.Data());
244  }
245  Printf("Worker: %9s %s %s%s| %s", fOrdinal.Data(), si.Data(), msd.Data(), datadir.Data(), stat.Data());
246 
247  } else {
248  TString msd = fMsd.IsNull() ? "<null>" : fMsd.Data();
249 
250  std::cout << "Slave: " << fOrdinal
251  << " hostname: " << fHostName
252  << " msd: " << msd
253  << " perf index: " << fPerfIndex
254  << " " << stat
255  << std::endl;
256  }
257 }
258 
259 ////////////////////////////////////////////////////////////////////////////////
260 /// Setter for fSysInfo
261 
262 void TSlaveInfo::SetSysInfo(SysInfo_t si)
263 {
264  fSysInfo.fOS = si.fOS; // OS
265  fSysInfo.fModel = si.fModel; // computer model
266  fSysInfo.fCpuType = si.fCpuType; // type of cpu
267  fSysInfo.fCpus = si.fCpus; // number of cpus
268  fSysInfo.fCpuSpeed = si.fCpuSpeed; // cpu speed in MHz
269  fSysInfo.fBusSpeed = si.fBusSpeed; // bus speed in MHz
270  fSysInfo.fL2Cache = si.fL2Cache; // level 2 cache size in KB
271  fSysInfo.fPhysRam = si.fPhysRam; // Physical RAM
272 }
273 
274 ClassImp(TProof);
275 
276 //------------------------------------------------------------------------------
277 
278 ////////////////////////////////////////////////////////////////////////////////
279 /// Destructor
280 
281 TMergerInfo::~TMergerInfo()
282 {
283  // Just delete the list, the objects are owned by other list
284  if (fWorkers) {
285  fWorkers->SetOwner(kFALSE);
286  SafeDelete(fWorkers);
287  }
288 }
289 ////////////////////////////////////////////////////////////////////////////////
290 /// Increase number of already merged workers by 1
291 
292 void TMergerInfo::SetMergedWorker()
293 {
294  if (AreAllWorkersMerged())
295  Error("SetMergedWorker", "all workers have been already merged before!");
296  else
297  fMergedWorkers++;
298 }
299 
300 ////////////////////////////////////////////////////////////////////////////////
301 /// Add new worker to the list of workers to be merged by this merger
302 
303 void TMergerInfo::AddWorker(TSlave *sl)
304 {
305  if (!fWorkers)
306  fWorkers = new TList();
307  if (fWorkersToMerge == fWorkers->GetSize()) {
308  Error("AddWorker", "all workers have been already assigned to this merger");
309  return;
310  }
311  fWorkers->Add(sl);
312 }
313 
314 ////////////////////////////////////////////////////////////////////////////////
315 /// Return if merger has already merged all workers, i.e. if it has finished its merging job
316 
317 Bool_t TMergerInfo::AreAllWorkersMerged()
318 {
319  return (fWorkersToMerge == fMergedWorkers);
320 }
321 
322 ////////////////////////////////////////////////////////////////////////////////
323 /// Return if the determined number of workers has been already assigned to this merger
324 
325 Bool_t TMergerInfo::AreAllWorkersAssigned()
326 {
327  if (!fWorkers)
328  return kFALSE;
329 
330  return (fWorkers->GetSize() == fWorkersToMerge);
331 }
332 
333 ////////////////////////////////////////////////////////////////////////////////
334 /// This a private API function.
335 /// It checks whether the connection string contains a PoD cluster protocol.
336 /// If it does, then the connection string will be changed to reflect
337 /// a real PROOF connection string for a PROOF cluster managed by PoD.
338 /// PoD: http://pod.gsi.de .
339 /// Return -1 if the PoD request failed; return 0 otherwise.
340 
341 static Int_t PoDCheckUrl(TString *_cluster)
342 {
343  if ( !_cluster )
344  return 0;
345 
346  // trim spaces from both sides of the string
347  *_cluster = _cluster->Strip( TString::kBoth );
348  // PoD protocol string
349  const TString pod_prot("pod");
350 
351  // URL test
352  // TODO: The URL test is to support remote PoD servers (not managed by pod-remote)
353  TUrl url( _cluster->Data() );
354  if( pod_prot.CompareTo(url.GetProtocol(), TString::kIgnoreCase) )
355  return 0;
356 
357  // PoD cluster is used
358  // call pod-info in a batch mode (-b).
359  // pod-info will find either a local PoD cluster or
360  // a remote one, manged by pod-remote.
361  *_cluster = gSystem->GetFromPipe("pod-info -c -b");
362  if( 0 == _cluster->Length() ) {
363  Error("PoDCheckUrl", "PoD server is not running");
364  return -1;
365  }
366  return 0;
367 }
368 
369 ////////////////////////////////////////////////////////////////////////////////
370 /// Create a PROOF environment. Starting PROOF involves either connecting
371 /// to a master server, which in turn will start a set of slave servers, or
372 /// directly starting as master server (if master = ""). Masterurl is of
373 /// the form: [proof[s]://]host[:port]. Conffile is the name of the config
374 /// file describing the remote PROOF cluster (this argument alows you to
375 /// describe different cluster configurations).
376 /// The default is proof.conf. Confdir is the directory where the config
377 /// file and other PROOF related files are (like motd and noproof files).
378 /// Loglevel is the log level (default = 1). User specified custom config
379 /// files will be first looked for in $HOME/.conffile.
380 
381 TProof::TProof(const char *masterurl, const char *conffile, const char *confdir,
382  Int_t loglevel, const char *alias, TProofMgr *mgr)
383  : fUrl(masterurl)
384 {
385  // Default initializations
386  InitMembers();
387 
388  // This may be needed during init
389  fManager = mgr;
390 
391  // Default server type
392  fServType = TProofMgr::kXProofd;
393 
394  // Default query mode
395  fQueryMode = kSync;
396 
397  // Parse the main URL, adjusting the missing fields and setting the relevant
398  // bits
399  ResetBit(TProof::kIsClient);
400  ResetBit(TProof::kIsMaster);
401 
402  // Protocol and Host
403  if (!masterurl || strlen(masterurl) <= 0) {
404  fUrl.SetProtocol("proof");
405  fUrl.SetHost("__master__");
406  } else if (!(strstr(masterurl, "://"))) {
407  fUrl.SetProtocol("proof");
408  }
409  // Port
410  if (fUrl.GetPort() == TUrl(" ").GetPort())
411  fUrl.SetPort(TUrl("proof:// ").GetPort());
412 
413  // Make sure to store the FQDN, so to get a solid reference for subsequent checks
414  if (!strcmp(fUrl.GetHost(), "__master__"))
415  fMaster = fUrl.GetHost();
416  else if (!strlen(fUrl.GetHost()))
417  fMaster = gSystem->GetHostByName(gSystem->HostName()).GetHostName();
418  else
419  fMaster = gSystem->GetHostByName(fUrl.GetHost()).GetHostName();
420 
421  // Server type
422  if (strlen(fUrl.GetOptions()) > 0) {
423  TString opts(fUrl.GetOptions());
424  if (!(strncmp(fUrl.GetOptions(),"std",3))) {
425  fServType = TProofMgr::kProofd;
426  opts.Remove(0,3);
427  fUrl.SetOptions(opts.Data());
428  } else if (!(strncmp(fUrl.GetOptions(),"lite",4))) {
429  fServType = TProofMgr::kProofLite;
430  opts.Remove(0,4);
431  fUrl.SetOptions(opts.Data());
432  }
433  }
434 
435  // Instance type
436  fMasterServ = kFALSE;
437  SetBit(TProof::kIsClient);
438  ResetBit(TProof::kIsMaster);
439  if (fMaster == "__master__") {
440  fMasterServ = kTRUE;
441  ResetBit(TProof::kIsClient);
442  SetBit(TProof::kIsMaster);
443  } else if (fMaster == "prooflite") {
444  // Client and master are merged
445  fMasterServ = kTRUE;
446  SetBit(TProof::kIsMaster);
447  }
448  // Flag that we are a client
449  if (TestBit(TProof::kIsClient))
450  if (!gSystem->Getenv("ROOTPROOFCLIENT")) gSystem->Setenv("ROOTPROOFCLIENT","");
451 
452  Init(masterurl, conffile, confdir, loglevel, alias);
453 
454  // If the user was not set, get it from the master
455  if (strlen(fUrl.GetUser()) <= 0) {
456  TString usr, emsg;
457  if (Exec("gProofServ->GetUser()", "0", kTRUE) == 0) {
458  TObjString *os = fMacroLog.GetLineWith("const char");
459  if (os) {
460  Ssiz_t fst = os->GetString().First('\"');
461  Ssiz_t lst = os->GetString().Last('\"');
462  usr = os->GetString()(fst+1, lst-fst-1);
463  } else {
464  emsg = "could not find 'const char *' string in macro log";
465  }
466  } else {
467  emsg = "could not retrieve user info";
468  }
469  if (!emsg.IsNull()) {
470  // Get user logon name
471  UserGroup_t *pw = gSystem->GetUserInfo();
472  if (pw) {
473  usr = pw->fUser;
474  delete pw;
475  }
476  Warning("TProof", "%s: using local default %s", emsg.Data(), usr.Data());
477  }
478  // Set the user name in the main URL
479  fUrl.SetUser(usr.Data());
480  }
481 
482  // If called by a manager, make sure it stays in last position
483  // for cleaning
484  if (mgr) {
485  R__LOCKGUARD(gROOTMutex);
486  gROOT->GetListOfSockets()->Remove(mgr);
487  gROOT->GetListOfSockets()->Add(mgr);
488  }
489 
490  // Old-style server type: we add this to the list and set the global pointer
491  if (IsProofd() || TestBit(TProof::kIsMaster))
492  if (!gROOT->GetListOfProofs()->FindObject(this))
493  gROOT->GetListOfProofs()->Add(this);
494 
495  // Still needed by the packetizers: needs to be changed
496  gProof = this;
497 }
498 
499 ////////////////////////////////////////////////////////////////////////////////
500 /// Protected constructor to be used by classes deriving from TProof
501 /// (they have to call Init themselves and override StartSlaves
502 /// appropriately).
503 ///
504 /// This constructor simply closes any previous gProof and sets gProof
505 /// to this instance.
506 
507 TProof::TProof() : fUrl(""), fServType(TProofMgr::kXProofd)
508 {
509  // Default initializations
510  InitMembers();
511 
512  if (!gROOT->GetListOfProofs()->FindObject(this))
513  gROOT->GetListOfProofs()->Add(this);
514 
515  gProof = this;
516 }
517 
518 ////////////////////////////////////////////////////////////////////////////////
519 /// Default initializations
520 
521 void TProof::InitMembers()
522 {
523  fValid = kFALSE;
524  fTty = kFALSE;
525  fRecvMessages = 0;
526  fSlaveInfo = 0;
527  fMasterServ = kFALSE;
528  fSendGroupView = kFALSE;
529  fIsPollingWorkers = kFALSE;
530  fLastPollWorkers_s = -1;
531  fActiveSlaves = 0;
532  fInactiveSlaves = 0;
533  fUniqueSlaves = 0;
534  fAllUniqueSlaves = 0;
535  fNonUniqueMasters = 0;
536  fActiveMonitor = 0;
537  fUniqueMonitor = 0;
538  fAllUniqueMonitor = 0;
539  fCurrentMonitor = 0;
540  fBytesRead = 0;
541  fRealTime = 0;
542  fCpuTime = 0;
543  fIntHandler = 0;
544  fProgressDialog = 0;
545  fProgressDialogStarted = kFALSE;
546  SetBit(kUseProgressDialog);
547  fPlayer = 0;
548  fFeedback = 0;
549  fChains = 0;
550  fDSet = 0;
551  fNotIdle = 0;
552  fSync = kTRUE;
553  fRunStatus = kRunning;
554  fIsWaiting = kFALSE;
555  fRedirLog = kFALSE;
556  fLogFileW = 0;
557  fLogFileR = 0;
558  fLogToWindowOnly = kFALSE;
559  fSaveLogToMacro = kFALSE;
560  fMacroLog.SetName("ProofLogMacro");
561 
562  fWaitingSlaves = 0;
563  fQueries = 0;
564  fOtherQueries = 0;
565  fDrawQueries = 0;
566  fMaxDrawQueries = 1;
567  fSeqNum = 0;
568 
569  fSessionID = -1;
570  fEndMaster = kFALSE;
571 
572  fPackMgr = 0;
573  fEnabledPackagesOnCluster = 0;
574 
575  fInputData = 0;
576 
577  fPrintProgress = 0;
578 
579  fLoadedMacros = 0;
580 
581  fProtocol = -1;
582  fSlaves = 0;
583  fTerminatedSlaveInfos = 0;
584  fBadSlaves = 0;
585  fAllMonitor = 0;
586  fDataReady = kFALSE;
587  fBytesReady = 0;
588  fTotalBytes = 0;
589  fAvailablePackages = 0;
590  fEnabledPackages = 0;
591  fRunningDSets = 0;
592 
593  fCollectTimeout = -1;
594 
595  fManager = 0;
596  fQueryMode = kSync;
597  fDynamicStartup = kFALSE;
598 
599  fMergersSet = kFALSE;
600  fMergersByHost = kFALSE;
601  fMergers = 0;
602  fMergersCount = -1;
603  fLastAssignedMerger = 0;
604  fWorkersToMerge = 0;
605  fFinalizationRunning = kFALSE;
606 
607  fPerfTree = "";
608 
609  fWrksOutputReady = 0;
610 
611  fSelector = 0;
612 
613  fPrepTime = 0.;
614 
615  // Check if the user defined a list of environment variables to send over:
616  // include them into the dedicated list
617  if (gSystem->Getenv("PROOF_ENVVARS")) {
618  TString envs(gSystem->Getenv("PROOF_ENVVARS")), env, envsfound;
619  Int_t from = 0;
620  while (envs.Tokenize(env, from, ",")) {
621  if (!env.IsNull()) {
622  if (!gSystem->Getenv(env)) {
623  Warning("Init", "request for sending over undefined environemnt variable '%s' - ignoring", env.Data());
624  } else {
625  if (!envsfound.IsNull()) envsfound += ",";
626  envsfound += env;
627  TProof::DelEnvVar(env);
628  TProof::AddEnvVar(env, gSystem->Getenv(env));
629  }
630  }
631  }
632  if (envsfound.IsNull()) {
633  Warning("Init", "none of the requested env variables were found: '%s'", envs.Data());
634  } else {
635  Info("Init", "the following environment variables have been added to the list to be sent to the nodes: '%s'", envsfound.Data());
636  }
637  }
638 
639  // Done
640  return;
641 }
642 
643 ////////////////////////////////////////////////////////////////////////////////
644 /// Clean up PROOF environment.
645 
646 TProof::~TProof()
647 {
648  if (fChains) {
649  while (TChain *chain = dynamic_cast<TChain*> (fChains->First()) ) {
650  // remove "chain" from list
651  chain->SetProof(0);
652  RemoveChain(chain);
653  }
654  }
655 
656  // remove links to packages enabled on the client
657  if (TestBit(TProof::kIsClient)) {
658  // iterate over all packages
659  TList *epl = fPackMgr->GetListOfEnabled();
660  TIter nxp(epl);
661  while (TObjString *pck = (TObjString *)(nxp())) {
662  FileStat_t stat;
663  if (gSystem->GetPathInfo(pck->String(), stat) == 0) {
664  // check if symlink, if so unlink
665  // NOTE: GetPathInfo() returns 1 in case of symlink that does not point to
666  // existing file or to a directory, but if fIsLink is true the symlink exists
667  if (stat.fIsLink)
668  gSystem->Unlink(pck->String());
669  }
670  }
671  }
672 
673  Close();
674  SafeDelete(fIntHandler);
675  SafeDelete(fSlaves);
676  SafeDelete(fActiveSlaves);
677  SafeDelete(fInactiveSlaves);
678  SafeDelete(fUniqueSlaves);
679  SafeDelete(fAllUniqueSlaves);
680  SafeDelete(fNonUniqueMasters);
681  SafeDelete(fTerminatedSlaveInfos);
682  SafeDelete(fBadSlaves);
683  SafeDelete(fAllMonitor);
684  SafeDelete(fActiveMonitor);
685  SafeDelete(fUniqueMonitor);
686  SafeDelete(fAllUniqueMonitor);
687  SafeDelete(fSlaveInfo);
688  SafeDelete(fChains);
689  SafeDelete(fPlayer);
690  SafeDelete(fFeedback);
691  SafeDelete(fWaitingSlaves);
692  SafeDelete(fAvailablePackages);
693  SafeDelete(fEnabledPackages);
694  SafeDelete(fLoadedMacros);
695  SafeDelete(fPackMgr);
696  SafeDelete(fRecvMessages);
697  SafeDelete(fInputData);
698  SafeDelete(fRunningDSets);
699  if (fWrksOutputReady) {
700  fWrksOutputReady->SetOwner(kFALSE);
701  delete fWrksOutputReady;
702  }
703 
704  // remove file with redirected logs
705  if (TestBit(TProof::kIsClient)) {
706  if (fLogFileR)
707  fclose(fLogFileR);
708  if (fLogFileW)
709  fclose(fLogFileW);
710  if (fLogFileName.Length() > 0)
711  gSystem->Unlink(fLogFileName);
712  }
713 
714  // Remove for the global list
715  gROOT->GetListOfProofs()->Remove(this);
716  // ... and from the manager list
717  if (fManager && fManager->IsValid())
718  fManager->DiscardSession(this);
719 
720  if (gProof && gProof == this) {
721  // Set previous as default
722  TIter pvp(gROOT->GetListOfProofs(), kIterBackward);
723  while ((gProof = (TProof *)pvp())) {
724  if (gProof->InheritsFrom(TProof::Class()))
725  break;
726  }
727  }
728 
729  // For those interested in our destruction ...
730  Emit("~TProof()");
731  Emit("CloseWindow()");
732 }
733 
734 ////////////////////////////////////////////////////////////////////////////////
735 /// Start the PROOF environment. Starting PROOF involves either connecting
736 /// to a master server, which in turn will start a set of slave servers, or
737 /// directly starting as master server (if master = ""). For a description
738 /// of the arguments see the TProof ctor. Returns the number of started
739 /// master or slave servers, returns 0 in case of error, in which case
740 /// fValid remains false.
741 
742 Int_t TProof::Init(const char *, const char *conffile,
743  const char *confdir, Int_t loglevel, const char *alias)
744 {
745  R__ASSERT(gSystem);
746 
747  fValid = kFALSE;
748 
749  // Connected to terminal?
750  fTty = (isatty(0) == 0 || isatty(1) == 0) ? kFALSE : kTRUE;
751 
752  // If in attach mode, options is filled with additional info
753  Bool_t attach = kFALSE;
754  if (strlen(fUrl.GetOptions()) > 0) {
755  attach = kTRUE;
756  // A flag from the GUI
757  TString opts = fUrl.GetOptions();
758  if (opts.Contains("GUI")) {
759  SetBit(TProof::kUsingSessionGui);
760  opts.Remove(opts.Index("GUI"));
761  fUrl.SetOptions(opts);
762  }
763  }
764 
765  if (TestBit(TProof::kIsMaster)) {
766  // Fill default conf file and conf dir
767  if (!conffile || !conffile[0])
768  fConfFile = kPROOF_ConfFile;
769  if (!confdir || !confdir[0])
770  fConfDir = kPROOF_ConfDir;
771  // The group; the client receives it in the kPROOF_SESSIONTAG message
772  if (gProofServ) fGroup = gProofServ->GetGroup();
773  } else {
774  fConfDir = confdir;
775  fConfFile = conffile;
776  }
777 
778  // Analysise the conffile field
779  if (fConfFile.Contains("workers=0")) fConfFile.ReplaceAll("workers=0", "masteronly");
780  ParseConfigField(fConfFile);
781 
782  fWorkDir = gSystem->WorkingDirectory();
783  fLogLevel = loglevel;
784  fProtocol = kPROOF_Protocol;
785  fSendGroupView = kTRUE;
786  fImage = fMasterServ ? "" : "<local>";
787  fIntHandler = 0;
788  fStatus = 0;
789  fRecvMessages = new TList;
790  fRecvMessages->SetOwner(kTRUE);
791  fSlaveInfo = 0;
792  fChains = new TList;
793  fAvailablePackages = 0;
794  fEnabledPackages = 0;
795  fRunningDSets = 0;
796  fEndMaster = TestBit(TProof::kIsMaster) ? kTRUE : kFALSE;
797  fInputData = 0;
798  ResetBit(TProof::kNewInputData);
799  fPrintProgress = 0;
800 
801  // Timeout for some collect actions
802  fCollectTimeout = gEnv->GetValue("Proof.CollectTimeout", -1);
803 
804  // Should the workers be started dynamically; default: no
805  fDynamicStartup = gEnv->GetValue("Proof.DynamicStartup", kFALSE);
806 
807  // Default entry point for the data pool is the master
808  if (TestBit(TProof::kIsClient))
809  fDataPoolUrl.Form("root://%s", fMaster.Data());
810  else
811  fDataPoolUrl = "";
812 
813  fProgressDialog = 0;
814  fProgressDialogStarted = kFALSE;
815 
816  // Default alias is the master name
817  TString al = (alias) ? alias : fMaster.Data();
818  SetAlias(al);
819 
820  // Client logging of messages from the master and slaves
821  fRedirLog = kFALSE;
822  if (TestBit(TProof::kIsClient)) {
823  fLogFileName.Form("%s/ProofLog_%d", gSystem->TempDirectory(), gSystem->GetPid());
824  if ((fLogFileW = fopen(fLogFileName, "w")) == 0)
825  Error("Init", "could not create temporary logfile");
826  if ((fLogFileR = fopen(fLogFileName, "r")) == 0)
827  Error("Init", "could not open temp logfile for reading");
828  }
829  fLogToWindowOnly = kFALSE;
830 
831  // Status of cluster
832  fNotIdle = 0;
833  // Query type
834  fSync = (attach) ? kFALSE : kTRUE;
835  // Not enqueued
836  fIsWaiting = kFALSE;
837 
838  // Counters
839  fBytesRead = 0;
840  fRealTime = 0;
841  fCpuTime = 0;
842 
843  // List of queries
844  fQueries = 0;
845  fOtherQueries = 0;
846  fDrawQueries = 0;
847  fMaxDrawQueries = 1;
848  fSeqNum = 0;
849 
850  // Remote ID of the session
851  fSessionID = -1;
852 
853  // Part of active query
854  fWaitingSlaves = 0;
855 
856  // Make remote PROOF player
857  fPlayer = 0;
858  MakePlayer();
859 
860  fFeedback = new TList;
861  fFeedback->SetOwner();
862  fFeedback->SetName("FeedbackList");
863  AddInput(fFeedback);
864 
865  // sort slaves by descending performance index
866  fSlaves = new TSortedList(kSortDescending);
867  fActiveSlaves = new TList;
868  fInactiveSlaves = new TList;
869  fUniqueSlaves = new TList;
870  fAllUniqueSlaves = new TList;
871  fNonUniqueMasters = new TList;
872  fBadSlaves = new TList;
873  fAllMonitor = new TMonitor;
874  fActiveMonitor = new TMonitor;
875  fUniqueMonitor = new TMonitor;
876  fAllUniqueMonitor = new TMonitor;
877  fCurrentMonitor = 0;
878 
879  fTerminatedSlaveInfos = new TList;
880  fTerminatedSlaveInfos->SetOwner(kTRUE);
881 
882  fLoadedMacros = 0;
883  fPackMgr = 0;
884 
885  // Enable optimized sending of streamer infos to use embedded backward/forward
886  // compatibility support between different ROOT versions and different versions of
887  // users classes
888  Bool_t enableSchemaEvolution = gEnv->GetValue("Proof.SchemaEvolution",1);
889  if (enableSchemaEvolution) {
890  TMessage::EnableSchemaEvolutionForAll();
891  } else {
892  Info("TProof", "automatic schema evolution in TMessage explicitly disabled");
893  }
894 
895  if (IsMaster()) {
896  // to make UploadPackage() method work on the master as well.
897  if (gProofServ) fPackMgr = gProofServ->GetPackMgr();
898  } else {
899 
900  TString sandbox;
901  if (GetSandbox(sandbox, kTRUE) != 0) {
902  Error("Init", "failure asserting sandbox directory %s", sandbox.Data());
903  return 0;
904  }
905 
906  // Package Dir
907  TString packdir = gEnv->GetValue("Proof.PackageDir", "");
908  if (packdir.IsNull())
909  packdir.Form("%s/%s", sandbox.Data(), kPROOF_PackDir);
910  if (AssertPath(packdir, kTRUE) != 0) {
911  Error("Init", "failure asserting directory %s", packdir.Data());
912  return 0;
913  }
914  fPackMgr = new TPackMgr(packdir);
915  if (gDebug > 0)
916  Info("Init", "package directory set to %s", packdir.Data());
917  }
918 
919  if (!IsMaster()) {
920  // List of directories where to look for global packages
921  TString globpack = gEnv->GetValue("Proof.GlobalPackageDirs","");
922  TProofServ::ResolveKeywords(globpack);
923  Int_t nglb = TPackMgr::RegisterGlobalPath(globpack);
924  if (gDebug > 0)
925  Info("Init", " %d global package directories registered", nglb);
926  }
927 
928  // Master may want dynamic startup
929  if (fDynamicStartup) {
930  if (!IsMaster()) {
931  // If on client - start the master
932  if (!StartSlaves(attach))
933  return 0;
934  }
935  } else {
936 
937  // Master Only mode (for operations requiring only the master, e.g. dataset browsing,
938  // result retrieving, ...)
939  Bool_t masterOnly = gEnv->GetValue("Proof.MasterOnly", kFALSE);
940  if (!IsMaster() || !masterOnly) {
941  // Start slaves (the old, static, per-session way)
942  if (!StartSlaves(attach))
943  return 0;
944  // Client: Is Master in dynamic startup mode?
945  if (!IsMaster()) {
946  Int_t dyn = 0;
947  GetRC("Proof.DynamicStartup", dyn);
948  if (dyn != 0) fDynamicStartup = kTRUE;
949  }
950  }
951  }
952  // we are now properly initialized
953  fValid = kTRUE;
954 
955  // De-activate monitor (will be activated in Collect)
956  fAllMonitor->DeActivateAll();
957 
958  // By default go into parallel mode
959  Int_t nwrk = GetRemoteProtocol() > 35 ? -1 : 9999;
960  TNamed *n = 0;
961  if (TProof::GetEnvVars() &&
962  (n = (TNamed *) TProof::GetEnvVars()->FindObject("PROOF_NWORKERS"))) {
963  TString s(n->GetTitle());
964  if (s.IsDigit()) nwrk = s.Atoi();
965  }
966  GoParallel(nwrk, attach);
967 
968  // Send relevant initial state to slaves
969  if (!attach)
970  SendInitialState();
971  else if (!IsIdle())
972  // redirect log
973  fRedirLog = kTRUE;
974 
975  // Done at this point, the alias will be communicated to the coordinator, if any
976  if (TestBit(TProof::kIsClient))
977  SetAlias(al);
978 
979  SetActive(kFALSE);
980 
981  if (IsValid()) {
982 
983  // Activate input handler
984  ActivateAsyncInput();
985 
986  R__LOCKGUARD(gROOTMutex);
987  gROOT->GetListOfSockets()->Add(this);
988  }
989 
990  AskParallel();
991 
992  return fActiveSlaves->GetSize();
993 }
994 
995 ////////////////////////////////////////////////////////////////////////////////
996 /// Set the sandbox path from ' Proof.Sandbox' or the alternative var 'rc'.
997 /// Use the existing setting or the default if nothing is found.
998 /// If 'assert' is kTRUE, make also sure that the path exists.
999 /// Return 0 on success, -1 on failure
1000 
1001 Int_t TProof::GetSandbox(TString &sb, Bool_t assert, const char *rc)
1002 {
1003  // Get it from 'rc', if defined
1004  if (rc && strlen(rc)) sb = gEnv->GetValue(rc, sb);
1005  // Or use the default 'rc'
1006  if (sb.IsNull()) sb = gEnv->GetValue("Proof.Sandbox", "");
1007  // If nothing found , use the default
1008  if (sb.IsNull()) sb.Form("~/%s", kPROOF_WorkDir);
1009  // Expand special settings
1010  if (sb == ".") {
1011  sb = gSystem->pwd();
1012  } else if (sb == "..") {
1013  sb = gSystem->DirName(gSystem->pwd());
1014  }
1015  gSystem->ExpandPathName(sb);
1016 
1017  // Assert the path, if required
1018  if (assert && AssertPath(sb, kTRUE) != 0) return -1;
1019  // Done
1020  return 0;
1021 }
1022 
1023 ////////////////////////////////////////////////////////////////////////////////
1024 /// The config file field may contain special instructions which need to be
1025 /// parsed at the beginning, e.g. for debug runs with valgrind.
1026 /// Several options can be given separated by a ','
1027 
1028 void TProof::ParseConfigField(const char *config)
1029 {
1030  TString sconf(config), opt;
1031  Ssiz_t from = 0;
1032  Bool_t cpuPin = kFALSE;
1033 
1034  // Analysise the field
1035  const char *cq = (IsLite()) ? "\"" : "";
1036  while (sconf.Tokenize(opt, from, ",")) {
1037  if (opt.IsNull()) continue;
1038 
1039  if (opt.BeginsWith("valgrind")) {
1040  // Any existing valgrind setting? User can give full settings, which we fully respect,
1041  // or pass additional options for valgrind by prefixing 'valgrind_opts:'. For example,
1042  // TProof::AddEnvVar("PROOF_MASTER_WRAPPERCMD", "valgrind_opts:--time-stamp --leak-check=full"
1043  // will add option "--time-stamp --leak-check=full" to our default options
1044  TString mst, top, sub, wrk, all;
1045  TList *envs = fgProofEnvList;
1046  TNamed *n = 0;
1047  if (envs) {
1048  if ((n = (TNamed *) envs->FindObject("PROOF_WRAPPERCMD")))
1049  all = n->GetTitle();
1050  if ((n = (TNamed *) envs->FindObject("PROOF_MASTER_WRAPPERCMD")))
1051  mst = n->GetTitle();
1052  if ((n = (TNamed *) envs->FindObject("PROOF_TOPMASTER_WRAPPERCMD")))
1053  top = n->GetTitle();
1054  if ((n = (TNamed *) envs->FindObject("PROOF_SUBMASTER_WRAPPERCMD")))
1055  sub = n->GetTitle();
1056  if ((n = (TNamed *) envs->FindObject("PROOF_SLAVE_WRAPPERCMD")))
1057  wrk = n->GetTitle();
1058  }
1059  if (all != "" && mst == "") mst = all;
1060  if (all != "" && top == "") top = all;
1061  if (all != "" && sub == "") sub = all;
1062  if (all != "" && wrk == "") wrk = all;
1063  if (all != "" && all.BeginsWith("valgrind_opts:")) {
1064  // The field is used to add an option Reset the setting
1065  Info("ParseConfigField","valgrind run: resetting 'PROOF_WRAPPERCMD':"
1066  " must be set again for next run , if any");
1067  TProof::DelEnvVar("PROOF_WRAPPERCMD");
1068  }
1069  TString var, cmd;
1070  cmd.Form("%svalgrind -v --suppressions=<rootsys>/etc/valgrind-root.supp", cq);
1071  TString mstlab("NO"), wrklab("NO");
1072  Bool_t doMaster = (opt == "valgrind" || (opt.Contains("master") &&
1073  !opt.Contains("topmaster") && !opt.Contains("submaster")))
1074  ? kTRUE : kFALSE;
1075  if (doMaster) {
1076  if (!IsLite()) {
1077  // Check if we have to add a var
1078  if (mst == "" || mst.BeginsWith("valgrind_opts:")) {
1079  mst.ReplaceAll("valgrind_opts:","");
1080  var.Form("%s --log-file=<logfilemst>.valgrind.log %s", cmd.Data(), mst.Data());
1081  TProof::AddEnvVar("PROOF_MASTER_WRAPPERCMD", var);
1082  mstlab = "YES";
1083  } else if (mst != "") {
1084  mstlab = "YES";
1085  }
1086  } else {
1087  if (opt.Contains("master")) {
1088  Warning("ParseConfigField",
1089  "master valgrinding does not make sense for PROOF-Lite: ignoring");
1090  opt.ReplaceAll("master", "");
1091  if (!opt.Contains("workers")) return;
1092  }
1093  if (opt == "valgrind" || opt == "valgrind=") opt = "valgrind=workers";
1094  }
1095  }
1096  if (opt.Contains("topmaster")) {
1097  // Check if we have to add a var
1098  if (top == "" || top.BeginsWith("valgrind_opts:")) {
1099  top.ReplaceAll("valgrind_opts:","");
1100  var.Form("%s --log-file=<logfilemst>.valgrind.log %s", cmd.Data(), top.Data());
1101  TProof::AddEnvVar("PROOF_TOPMASTER_WRAPPERCMD", var);
1102  mstlab = "YES";
1103  } else if (top != "") {
1104  mstlab = "YES";
1105  }
1106  }
1107  if (opt.Contains("submaster")) {
1108  // Check if we have to add a var
1109  if (sub == "" || sub.BeginsWith("valgrind_opts:")) {
1110  sub.ReplaceAll("valgrind_opts:","");
1111  var.Form("%s --log-file=<logfilemst>.valgrind.log %s", cmd.Data(), sub.Data());
1112  TProof::AddEnvVar("PROOF_SUBMASTER_WRAPPERCMD", var);
1113  mstlab = "YES";
1114  } else if (sub != "") {
1115  mstlab = "YES";
1116  }
1117  }
1118  if (opt.Contains("=workers") || opt.Contains("+workers")) {
1119  // Check if we have to add a var
1120  if (wrk == "" || wrk.BeginsWith("valgrind_opts:")) {
1121  wrk.ReplaceAll("valgrind_opts:","");
1122  var.Form("%s --log-file=<logfilewrk>.__valgrind__.log %s%s", cmd.Data(), wrk.Data(), cq);
1123  TProof::AddEnvVar("PROOF_SLAVE_WRAPPERCMD", var);
1124  TString nwrks("2");
1125  Int_t inw = opt.Index('#');
1126  if (inw != kNPOS) {
1127  nwrks = opt(inw+1, opt.Length());
1128  if (!nwrks.IsDigit()) nwrks = "2";
1129  }
1130  // Set the relevant variables
1131  if (!IsLite()) {
1132  TProof::AddEnvVar("PROOF_NWORKERS", nwrks);
1133  } else {
1134  gEnv->SetValue("ProofLite.Workers", nwrks.Atoi());
1135  }
1136  wrklab = nwrks;
1137  // Register the additional worker log in the session file
1138  // (for the master this is done automatically)
1139  TProof::AddEnvVar("PROOF_ADDITIONALLOG", "__valgrind__.log*");
1140  } else if (wrk != "") {
1141  wrklab = "ALL";
1142  }
1143  }
1144  // Increase the relevant timeouts
1145  if (!IsLite()) {
1146  TProof::AddEnvVar("PROOF_INTWAIT", "5000");
1147  gEnv->SetValue("Proof.SocketActivityTimeout", 6000);
1148  } else {
1149  gEnv->SetValue("ProofLite.StartupTimeOut", 5000);
1150  }
1151  // Warn for slowness
1152  Printf(" ");
1153  if (!IsLite()) {
1154  Printf(" ---> Starting a debug run with valgrind (master:%s, workers:%s)", mstlab.Data(), wrklab.Data());
1155  } else {
1156  Printf(" ---> Starting a debug run with valgrind (workers:%s)", wrklab.Data());
1157  }
1158  Printf(" ---> Please be patient: startup may be VERY slow ...");
1159  Printf(" ---> Logs will be available as special tags in the log window (from the progress dialog or TProof::LogViewer()) ");
1160  Printf(" ---> (Reminder: this debug run makes sense only if you are running a debug version of ROOT)");
1161  Printf(" ");
1162 
1163  } else if (opt.BeginsWith("igprof-pp")) {
1164 
1165  // IgProf profiling on master and worker. PROOF does not set the
1166  // environment for you: proper environment variables (like PATH and
1167  // LD_LIBRARY_PATH) should be set externally
1168 
1169  Printf("*** Requested IgProf performance profiling ***");
1170  TString addLogExt = "__igprof.pp__.log";
1171  TString addLogFmt = "igprof -pk -pp -t proofserv.exe -o %s.%s";
1172  TString tmp;
1173 
1174  if (IsLite()) {
1175  addLogFmt.Append("\"");
1176  addLogFmt.Prepend("\"");
1177  }
1178 
1179  tmp.Form(addLogFmt.Data(), "<logfilemst>", addLogExt.Data());
1180  TProof::AddEnvVar("PROOF_MASTER_WRAPPERCMD", tmp.Data());
1181 
1182  tmp.Form(addLogFmt.Data(), "<logfilewrk>", addLogExt.Data());
1183  TProof::AddEnvVar("PROOF_SLAVE_WRAPPERCMD", tmp.Data() );
1184 
1185  TProof::AddEnvVar("PROOF_ADDITIONALLOG", addLogExt.Data());
1186 
1187  } else if (opt.BeginsWith("cpupin=")) {
1188  // Enable CPU pinning. Takes as argument the list of processor IDs
1189  // that will be used in order. Processor IDs are numbered from 0,
1190  // use likwid to see how they are organized. A possible parameter
1191  // format would be:
1192  //
1193  // cpupin=3+4+0+9+10+22+7
1194  //
1195  // Only the specified processor IDs will be used in a round-robin
1196  // fashion, dealing with the fact that you can request more workers
1197  // than the number of processor IDs you have specified.
1198  //
1199  // To use all available processors in their order:
1200  //
1201  // cpupin=*
1202 
1203  opt.Remove(0, 7);
1204 
1205  // Remove any char which is neither a number nor a plus '+'
1206  for (Ssiz_t i=0; i<opt.Length(); i++) {
1207  Char_t c = opt[i];
1208  if ((c != '+') && ((c < '0') || (c > '9')))
1209  opt[i] = '_';
1210  }
1211  opt.ReplaceAll("_", "");
1212  TProof::AddEnvVar("PROOF_SLAVE_CPUPIN_ORDER", opt);
1213  cpuPin = kTRUE;
1214  } else if (opt.BeginsWith("workers=")) {
1215 
1216  // Request for a given number of workers (within the max) or worker
1217  // startup combination:
1218  // workers=5 start max 5 workers (or less, if less are assigned)
1219  // workers=2x start max 2 workers per node (or less, if less are assigned)
1220  opt.ReplaceAll("workers=","");
1221  TProof::AddEnvVar("PROOF_NWORKERS", opt);
1222  }
1223  }
1224 
1225  // In case of PROOF-Lite, enable CPU pinning when requested (Linux only)
1226  #ifdef R__LINUX
1227  if (IsLite() && cpuPin) {
1228  Printf("*** Requested CPU pinning ***");
1229  const TList *ev = GetEnvVars();
1230  const char *pinCmd = "taskset -c <cpupin>";
1231  TString val;
1232  TNamed *p;
1233  if (ev && (p = dynamic_cast<TNamed *>(ev->FindObject("PROOF_SLAVE_WRAPPERCMD")))) {
1234  val = p->GetTitle();
1235  val.Insert(val.Length()-1, " ");
1236  val.Insert(val.Length()-1, pinCmd);
1237  }
1238  else {
1239  val.Form("\"%s\"", pinCmd);
1240  }
1241  TProof::AddEnvVar("PROOF_SLAVE_WRAPPERCMD", val.Data());
1242  }
1243  #endif
1244 }
1245 
1246 ////////////////////////////////////////////////////////////////////////////////
1247 /// Make sure that 'path' exists; if 'writable' is kTRUE, make also sure
1248 /// that the path is writable
1249 
1250 Int_t TProof::AssertPath(const char *inpath, Bool_t writable)
1251 {
1252  if (!inpath || strlen(inpath) <= 0) {
1253  Error("AssertPath", "undefined input path");
1254  return -1;
1255  }
1256 
1257  TString path(inpath);
1258  gSystem->ExpandPathName(path);
1259 
1260  if (gSystem->AccessPathName(path, kFileExists)) {
1261  if (gSystem->mkdir(path, kTRUE) != 0) {
1262  Error("AssertPath", "could not create path %s", path.Data());
1263  return -1;
1264  }
1265  }
1266  // It must be writable
1267  if (gSystem->AccessPathName(path, kWritePermission) && writable) {
1268  if (gSystem->Chmod(path, 0666) != 0) {
1269  Error("AssertPath", "could not make path %s writable", path.Data());
1270  return -1;
1271  }
1272  }
1273 
1274  // Done
1275  return 0;
1276 }
1277 
1278 ////////////////////////////////////////////////////////////////////////////////
1279 /// Set manager and schedule its destruction after this for clean
1280 /// operations.
1281 
1282 void TProof::SetManager(TProofMgr *mgr)
1283 {
1284  fManager = mgr;
1285 
1286  if (mgr) {
1287  R__LOCKGUARD(gROOTMutex);
1288  gROOT->GetListOfSockets()->Remove(mgr);
1289  gROOT->GetListOfSockets()->Add(mgr);
1290  }
1291 }
1292 
1293 ////////////////////////////////////////////////////////////////////////////////
1294 /// Works on the master node only.
1295 /// It starts workers on the machines in workerList and sets the paths,
1296 /// packages and macros as on the master.
1297 /// It is a subbstitute for StartSlaves(...)
1298 /// The code is mostly the master part of StartSlaves,
1299 /// with the parallel startup removed.
1300 
1301 Int_t TProof::AddWorkers(TList *workerList)
1302 {
1303  if (!IsMaster()) {
1304  Error("AddWorkers", "AddWorkers can only be called on the master!");
1305  return -1;
1306  }
1307 
1308  if (!workerList || !(workerList->GetSize())) {
1309  Error("AddWorkers", "empty list of workers!");
1310  return -2;
1311  }
1312 
1313  // Code taken from master part of StartSlaves with the parllel part removed
1314 
1315  fImage = gProofServ->GetImage();
1316  if (fImage.IsNull())
1317  fImage.Form("%s:%s", TUrl(gSystem->HostName()).GetHostFQDN(), gProofServ->GetWorkDir());
1318 
1319  // Get all workers
1320  UInt_t nSlaves = workerList->GetSize();
1321  UInt_t nSlavesDone = 0;
1322  Int_t ord = 0;
1323 
1324  // Loop over all new workers and start them (if we had already workers it means we are
1325  // increasing parallelism or that is not the first time we are called)
1326  Bool_t goMoreParallel = (fSlaves->GetEntries() > 0) ? kTRUE : kFALSE;
1327 
1328  // A list of TSlave objects for workers that are being added
1329  TList *addedWorkers = new TList();
1330  if (!addedWorkers) {
1331  // This is needed to silence Coverity ...
1332  Error("AddWorkers", "cannot create new list for the workers to be added");
1333  return -2;
1334  }
1335  addedWorkers->SetOwner(kFALSE);
1336  TListIter next(workerList);
1337  TObject *to;
1338  TProofNodeInfo *worker;
1339  TSlaveInfo *dummysi = new TSlaveInfo();
1340  while ((to = next())) {
1341  // Get the next worker from the list
1342  worker = (TProofNodeInfo *)to;
1343 
1344  // Read back worker node info
1345  const Char_t *image = worker->GetImage().Data();
1346  const Char_t *workdir = worker->GetWorkDir().Data();
1347  Int_t perfidx = worker->GetPerfIndex();
1348  Int_t sport = worker->GetPort();
1349  if (sport == -1)
1350  sport = fUrl.GetPort();
1351 
1352  // Create worker server
1353  TString fullord;
1354  if (worker->GetOrdinal().Length() > 0) {
1355  fullord.Form("%s.%s", gProofServ->GetOrdinal(), worker->GetOrdinal().Data());
1356  } else {
1357  fullord.Form("%s.%d", gProofServ->GetOrdinal(), ord);
1358  }
1359 
1360  // Remove worker from the list of workers terminated gracefully
1361  dummysi->SetOrdinal(fullord);
1362  TSlaveInfo *rmsi = (TSlaveInfo *)fTerminatedSlaveInfos->Remove(dummysi);
1363  SafeDelete(rmsi);
1364 
1365  // Create worker server
1366  TString wn(worker->GetNodeName());
1367  if (wn == "localhost" || wn.BeginsWith("localhost.")) wn = gSystem->HostName();
1368  TUrl u(TString::Format("%s:%d", wn.Data(), sport));
1369  // Add group info in the password firdl, if any
1370  if (strlen(gProofServ->GetGroup()) > 0) {
1371  // Set also the user, otherwise the password is not exported
1372  if (strlen(u.GetUser()) <= 0)
1373  u.SetUser(gProofServ->GetUser());
1374  u.SetPasswd(gProofServ->GetGroup());
1375  }
1376  TSlave *slave = 0;
1377  if (worker->IsWorker()) {
1378  slave = CreateSlave(u.GetUrl(), fullord, perfidx, image, workdir);
1379  } else {
1380  slave = CreateSubmaster(u.GetUrl(), fullord,
1381  image, worker->GetMsd(), worker->GetNWrks());
1382  }
1383 
1384  // Add to global list (we will add to the monitor list after
1385  // finalizing the server startup)
1386  Bool_t slaveOk = kTRUE;
1387  fSlaves->Add(slave);
1388  if (slave->IsValid()) {
1389  addedWorkers->Add(slave);
1390  } else {
1391  slaveOk = kFALSE;
1392  fBadSlaves->Add(slave);
1393  Warning("AddWorkers", "worker '%s' is invalid", slave->GetOrdinal());
1394  }
1395 
1396  PDB(kGlobal,3)
1397  Info("AddWorkers", "worker on host %s created"
1398  " and added to list (ord: %s)", worker->GetName(), slave->GetOrdinal());
1399 
1400  // Notify opening of connection
1401  nSlavesDone++;
1402  TMessage m(kPROOF_SERVERSTARTED);
1403  m << TString("Opening connections to workers") << nSlaves
1404  << nSlavesDone << slaveOk;
1405  gProofServ->GetSocket()->Send(m);
1406 
1407  ord++;
1408  } //end of the worker loop
1409  SafeDelete(dummysi);
1410 
1411  // Cleanup
1412  SafeDelete(workerList);
1413 
1414  nSlavesDone = 0;
1415 
1416  // Here we finalize the server startup: in this way the bulk
1417  // of remote operations are almost parallelized
1418  TIter nxsl(addedWorkers);
1419  TSlave *sl = 0;
1420  while ((sl = (TSlave *) nxsl())) {
1421 
1422  // Finalize setup of the server
1423  if (sl->IsValid())
1424  sl->SetupServ(TSlave::kSlave, 0);
1425 
1426  // Monitor good slaves
1427  Bool_t slaveOk = kTRUE;
1428  if (sl->IsValid()) {
1429  fAllMonitor->Add(sl->GetSocket());
1430  PDB(kGlobal,3)
1431  Info("AddWorkers", "worker on host %s finalized"
1432  " and added to list", sl->GetOrdinal());
1433  } else {
1434  slaveOk = kFALSE;
1435  fBadSlaves->Add(sl);
1436  }
1437 
1438  // Notify end of startup operations
1439  nSlavesDone++;
1440  TMessage m(kPROOF_SERVERSTARTED);
1441  m << TString("Setting up worker servers") << nSlaves
1442  << nSlavesDone << slaveOk;
1443  gProofServ->GetSocket()->Send(m);
1444  }
1445 
1446  // Now set new state on the added workers (on all workers for simplicity)
1447  // use fEnabledPackages, fLoadedMacros,
1448  // gSystem->GetDynamicPath() and gSystem->GetIncludePath()
1449  // no need to load packages that are only loaded and not enabled (dyn mode)
1450  Int_t nwrk = GetRemoteProtocol() > 35 ? -1 : 9999;
1451  TNamed *n = 0;
1452  if (TProof::GetEnvVars() &&
1453  (n = (TNamed *) TProof::GetEnvVars()->FindObject("PROOF_NWORKERS"))) {
1454  TString s(n->GetTitle());
1455  if (s.IsDigit()) nwrk = s.Atoi();
1456  }
1457 
1458  if (fDynamicStartup && goMoreParallel) {
1459 
1460  PDB(kGlobal, 3)
1461  Info("AddWorkers", "will invoke GoMoreParallel()");
1462  Int_t nw = GoMoreParallel(nwrk);
1463  PDB(kGlobal, 3)
1464  Info("AddWorkers", "GoMoreParallel()=%d", nw);
1465 
1466  }
1467  else {
1468  // Not in Dynamic Workers mode
1469  PDB(kGlobal, 3)
1470  Info("AddWorkers", "will invoke GoParallel()");
1471  GoParallel(nwrk, kFALSE, 0);
1472  }
1473 
1474  // Set worker processing environment
1475  SetupWorkersEnv(addedWorkers, goMoreParallel);
1476 
1477  // Update list of current workers
1478  PDB(kGlobal, 3)
1479  Info("AddWorkers", "will invoke SaveWorkerInfo()");
1480  SaveWorkerInfo();
1481 
1482  // Inform the client that the number of workers has changed
1483  if (fDynamicStartup && gProofServ) {
1484  PDB(kGlobal, 3)
1485  Info("AddWorkers", "will invoke SendParallel()");
1486  gProofServ->SendParallel(kTRUE);
1487 
1488  if (goMoreParallel && fPlayer) {
1489  // In case we are adding workers dynamically to an existing process, we
1490  // should invoke a special player's Process() to set only added workers
1491  // to the proper state
1492  PDB(kGlobal, 3)
1493  Info("AddWorkers", "will send the PROCESS message to selected workers");
1494  fPlayer->JoinProcess(addedWorkers);
1495  // Update merger counters (new workers are not yet active)
1496  fMergePrg.SetNWrks(fActiveSlaves->GetSize() + addedWorkers->GetSize());
1497  }
1498  }
1499 
1500  // Cleanup
1501  delete addedWorkers;
1502 
1503  return 0;
1504 }
1505 
1506 ////////////////////////////////////////////////////////////////////////////////
1507 /// Set up packages, loaded macros, include and lib paths ...
1508 
1509 void TProof::SetupWorkersEnv(TList *addedWorkers, Bool_t increasingWorkers)
1510 {
1511  // Packages
1512  TList *packs = gProofServ ? gProofServ->GetEnabledPackages() : GetEnabledPackages();
1513  if (packs && packs->GetSize() > 0) {
1514  TIter nxp(packs);
1515  TPair *pck = 0;
1516  while ((pck = (TPair *) nxp())) {
1517  // Upload and Enable methods are intelligent and avoid
1518  // re-uploading or re-enabling of a package to a node that has it.
1519  if (fDynamicStartup && increasingWorkers) {
1520  // Upload only on added workers
1521  PDB(kGlobal, 3)
1522  Info("SetupWorkersEnv", "will invoke UploadPackage() and EnablePackage() on added workers");
1523  if (UploadPackage(pck->GetName(), kUntar, addedWorkers) >= 0)
1524  EnablePackage(pck->GetName(), (TList *) pck->Value(), kTRUE, addedWorkers);
1525  } else {
1526  PDB(kGlobal, 3)
1527  Info("SetupWorkersEnv", "will invoke UploadPackage() and EnablePackage() on all workers");
1528  if (UploadPackage(pck->GetName()) >= 0)
1529  EnablePackage(pck->GetName(), (TList *) pck->Value(), kTRUE);
1530  }
1531  }
1532  }
1533 
1534  // Loaded macros
1535  if (fLoadedMacros) {
1536  TIter nxp(fLoadedMacros);
1537  TObjString *os = 0;
1538  while ((os = (TObjString *) nxp())) {
1539  PDB(kGlobal, 3) {
1540  Info("SetupWorkersEnv", "will invoke Load() on selected workers");
1541  Printf("Loading a macro : %s", os->GetName());
1542  }
1543  Load(os->GetName(), kTRUE, kTRUE, addedWorkers);
1544  }
1545  }
1546 
1547  // Dynamic path
1548  TString dyn = gSystem->GetDynamicPath();
1549  dyn.ReplaceAll(":", " ");
1550  dyn.ReplaceAll("\"", " ");
1551  PDB(kGlobal, 3)
1552  Info("SetupWorkersEnv", "will invoke AddDynamicPath() on selected workers");
1553  AddDynamicPath(dyn, kFALSE, addedWorkers, kFALSE); // Do not Collect
1554 
1555  // Include path
1556  TString inc = gSystem->GetIncludePath();
1557  inc.ReplaceAll("-I", " ");
1558  inc.ReplaceAll("\"", " ");
1559  PDB(kGlobal, 3)
1560  Info("SetupWorkersEnv", "will invoke AddIncludePath() on selected workers");
1561  AddIncludePath(inc, kFALSE, addedWorkers, kFALSE); // Do not Collect
1562 
1563  // Done
1564  return;
1565 }
1566 
1567 ////////////////////////////////////////////////////////////////////////////////
1568 /// Used for shuting down the workres after a query is finished.
1569 /// Sends each of the workers from the workerList, a kPROOF_STOP message.
1570 /// If the workerList == 0, shutdown all the workers.
1571 
1572 Int_t TProof::RemoveWorkers(TList *workerList)
1573 {
1574  if (!IsMaster()) {
1575  Error("RemoveWorkers", "RemoveWorkers can only be called on the master!");
1576  return -1;
1577  }
1578 
1579  fFileMap.clear(); // This could be avoided if CopyFromCache was used in SendFile
1580 
1581  if (!workerList) {
1582  // shutdown all the workers
1583  TIter nxsl(fSlaves);
1584  TSlave *sl = 0;
1585  while ((sl = (TSlave *) nxsl())) {
1586  // Shut down the worker assumig that it is not processing
1587  TerminateWorker(sl);
1588  }
1589 
1590  } else {
1591  if (!(workerList->GetSize())) {
1592  Error("RemoveWorkers", "The list of workers should not be empty!");
1593  return -2;
1594  }
1595 
1596  // Loop over all the workers and stop them
1597  TListIter next(workerList);
1598  TObject *to;
1599  TProofNodeInfo *worker;
1600  while ((to = next())) {
1601  TSlave *sl = 0;
1602  if (!strcmp(to->ClassName(), "TProofNodeInfo")) {
1603  // Get the next worker from the list
1604  worker = (TProofNodeInfo *)to;
1605  TIter nxsl(fSlaves);
1606  while ((sl = (TSlave *) nxsl())) {
1607  // Shut down the worker assumig that it is not processing
1608  if (sl->GetName() == worker->GetNodeName())
1609  break;
1610  }
1611  } else if (to->InheritsFrom(TSlave::Class())) {
1612  sl = (TSlave *) to;
1613  } else {
1614  Warning("RemoveWorkers","unknown object type: %s - it should be"
1615  " TProofNodeInfo or inheriting from TSlave", to->ClassName());
1616  }
1617  // Shut down the worker assumig that it is not processing
1618  if (sl) {
1619  if (gDebug > 0)
1620  Info("RemoveWorkers","terminating worker %s", sl->GetOrdinal());
1621  TerminateWorker(sl);
1622  }
1623  }
1624  }
1625 
1626  // Update also the master counter
1627  if (gProofServ && fSlaves->GetSize() <= 0) gProofServ->ReleaseWorker("master");
1628 
1629  return 0;
1630 }
1631 
1632 ////////////////////////////////////////////////////////////////////////////////
1633 /// Start up PROOF slaves.
1634 
1635 Bool_t TProof::StartSlaves(Bool_t attach)
1636 {
1637  // If this is a master server, find the config file and start slave
1638  // servers as specified in the config file
1639  if (TestBit(TProof::kIsMaster)) {
1640 
1641  Int_t pc = 0;
1642  TList *workerList = new TList;
1643  // Get list of workers
1644  if (gProofServ->GetWorkers(workerList, pc) == TProofServ::kQueryStop) {
1645  TString emsg("no resource currently available for this session: please retry later");
1646  if (gDebug > 0) Info("StartSlaves", "%s", emsg.Data());
1647  gProofServ->SendAsynMessage(emsg.Data());
1648  return kFALSE;
1649  }
1650  // Setup the workers
1651  if (AddWorkers(workerList) < 0)
1652  return kFALSE;
1653 
1654  } else {
1655 
1656  // create master server
1657  Printf("Starting master: opening connection ...");
1658  TSlave *slave = CreateSubmaster(fUrl.GetUrl(), "0", "master", 0);
1659 
1660  if (slave->IsValid()) {
1661 
1662  // Notify
1663  fprintf(stderr,"Starting master:"
1664  " connection open: setting up server ... \r");
1665  StartupMessage("Connection to master opened", kTRUE, 1, 1);
1666 
1667  if (!attach) {
1668 
1669  // Set worker interrupt handler
1670  slave->SetInterruptHandler(kTRUE);
1671 
1672  // Finalize setup of the server
1673  slave->SetupServ(TSlave::kMaster, fConfFile);
1674 
1675  if (slave->IsValid()) {
1676 
1677  // Notify
1678  Printf("Starting master: OK ");
1679  StartupMessage("Master started", kTRUE, 1, 1);
1680 
1681  // check protocol compatibility
1682  // protocol 1 is not supported anymore
1683  if (fProtocol == 1) {
1684  Error("StartSlaves",
1685  "client and remote protocols not compatible (%d and %d)",
1686  kPROOF_Protocol, fProtocol);
1687  slave->Close("S");
1688  delete slave;
1689  return kFALSE;
1690  }
1691 
1692  fSlaves->Add(slave);
1693  fAllMonitor->Add(slave->GetSocket());
1694 
1695  // Unset worker interrupt handler
1696  slave->SetInterruptHandler(kFALSE);
1697 
1698  // Set interrupt PROOF handler from now on
1699  fIntHandler = new TProofInterruptHandler(this);
1700 
1701  // Give-up after 5 minutes
1702  Int_t rc = Collect(slave, 300);
1703  Int_t slStatus = slave->GetStatus();
1704  if (slStatus == -99 || slStatus == -98 || rc == 0) {
1705  fSlaves->Remove(slave);
1706  fAllMonitor->Remove(slave->GetSocket());
1707  if (slStatus == -99)
1708  Error("StartSlaves", "no resources available or problems setting up workers (check logs)");
1709  else if (slStatus == -98)
1710  Error("StartSlaves", "could not setup output redirection on master");
1711  else
1712  Error("StartSlaves", "setting up master");
1713  slave->Close("S");
1714  delete slave;
1715  return 0;
1716  }
1717 
1718  if (!slave->IsValid()) {
1719  fSlaves->Remove(slave);
1720  fAllMonitor->Remove(slave->GetSocket());
1721  slave->Close("S");
1722  delete slave;
1723  Error("StartSlaves",
1724  "failed to setup connection with PROOF master server");
1725  return kFALSE;
1726  }
1727 
1728  if (!gROOT->IsBatch() && TestBit(kUseProgressDialog)) {
1729  if ((fProgressDialog =
1730  gROOT->GetPluginManager()->FindHandler("TProofProgressDialog")))
1731  if (fProgressDialog->LoadPlugin() == -1)
1732  fProgressDialog = 0;
1733  }
1734  } else {
1735  // Notify
1736  Printf("Starting master: failure");
1737  }
1738  } else {
1739 
1740  // Notify
1741  Printf("Starting master: OK ");
1742  StartupMessage("Master attached", kTRUE, 1, 1);
1743 
1744  if (!gROOT->IsBatch() && TestBit(kUseProgressDialog)) {
1745  if ((fProgressDialog =
1746  gROOT->GetPluginManager()->FindHandler("TProofProgressDialog")))
1747  if (fProgressDialog->LoadPlugin() == -1)
1748  fProgressDialog = 0;
1749  }
1750 
1751  fSlaves->Add(slave);
1752  fIntHandler = new TProofInterruptHandler(this);
1753  }
1754 
1755  } else {
1756  delete slave;
1757  // Notify only if verbosity is on: most likely the failure has already been notified
1758  if (gDebug > 0)
1759  Error("StartSlaves", "failed to create (or connect to) the PROOF master server");
1760  return kFALSE;
1761  }
1762  }
1763 
1764  return kTRUE;
1765 }
1766 
1767 ////////////////////////////////////////////////////////////////////////////////
1768 /// Close all open slave servers.
1769 /// Client can decide to shutdown the remote session by passing option is 'S'
1770 /// or 's'. Default for clients is detach, if supported. Masters always
1771 /// shutdown the remote counterpart.
1772 
1773 void TProof::Close(Option_t *opt)
1774 {
1775  { std::lock_guard<std::recursive_mutex> lock(fCloseMutex);
1776 
1777  fValid = kFALSE;
1778  if (fSlaves) {
1779  if (fIntHandler)
1780  fIntHandler->Remove();
1781 
1782  TIter nxs(fSlaves);
1783  TSlave *sl = 0;
1784  while ((sl = (TSlave *)nxs()))
1785  sl->Close(opt);
1786 
1787  fActiveSlaves->Clear("nodelete");
1788  fUniqueSlaves->Clear("nodelete");
1789  fAllUniqueSlaves->Clear("nodelete");
1790  fNonUniqueMasters->Clear("nodelete");
1791  fBadSlaves->Clear("nodelete");
1792  fInactiveSlaves->Clear("nodelete");
1793  fSlaves->Delete();
1794  }
1795  }
1796 
1797  { R__LOCKGUARD(gROOTMutex);
1798  gROOT->GetListOfSockets()->Remove(this);
1799 
1800  if (fChains) {
1801  while (TChain *chain = dynamic_cast<TChain*> (fChains->First()) ) {
1802  // remove "chain" from list
1803  chain->SetProof(0);
1804  RemoveChain(chain);
1805  }
1806  }
1807 
1808  if (IsProofd()) {
1809 
1810  gROOT->GetListOfProofs()->Remove(this);
1811  if (gProof && gProof == this) {
1812  // Set previous proofd-related as default
1813  TIter pvp(gROOT->GetListOfProofs(), kIterBackward);
1814  while ((gProof = (TProof *)pvp())) {
1815  if (gProof->IsProofd())
1816  break;
1817  }
1818  }
1819  }
1820  }
1821 }
1822 
1823 ////////////////////////////////////////////////////////////////////////////////
1824 /// Create a new TSlave of type TSlave::kSlave.
1825 /// Note: creation of TSlave is private with TProof as a friend.
1826 /// Derived classes must use this function to create slaves.
1827 
1828 TSlave *TProof::CreateSlave(const char *url, const char *ord,
1829  Int_t perf, const char *image, const char *workdir)
1830 {
1831  TSlave* sl = TSlave::Create(url, ord, perf, image,
1832  this, TSlave::kSlave, workdir, 0);
1833 
1834  if (sl->IsValid()) {
1835  sl->SetInputHandler(new TProofInputHandler(this, sl->GetSocket()));
1836  // must set fParallel to 1 for slaves since they do not
1837  // report their fParallel with a LOG_DONE message
1838  sl->fParallel = 1;
1839  }
1840 
1841  return sl;
1842 }
1843 
1844 
1845 ////////////////////////////////////////////////////////////////////////////////
1846 /// Create a new TSlave of type TSlave::kMaster.
1847 /// Note: creation of TSlave is private with TProof as a friend.
1848 /// Derived classes must use this function to create slaves.
1849 
1850 TSlave *TProof::CreateSubmaster(const char *url, const char *ord,
1851  const char *image, const char *msd, Int_t nwk)
1852 {
1853  TSlave *sl = TSlave::Create(url, ord, 100, image, this,
1854  TSlave::kMaster, 0, msd, nwk);
1855 
1856  if (sl->IsValid()) {
1857  sl->SetInputHandler(new TProofInputHandler(this, sl->GetSocket()));
1858  }
1859 
1860  return sl;
1861 }
1862 
1863 ////////////////////////////////////////////////////////////////////////////////
1864 /// Find slave that has TSocket s. Returns 0 in case slave is not found.
1865 
1866 TSlave *TProof::FindSlave(TSocket *s) const
1867 {
1868  TSlave *sl;
1869  TIter next(fSlaves);
1870 
1871  while ((sl = (TSlave *)next())) {
1872  if (sl->IsValid() && sl->GetSocket() == s)
1873  return sl;
1874  }
1875  return 0;
1876 }
1877 
1878 ////////////////////////////////////////////////////////////////////////////////
1879 /// Add to the fUniqueSlave list the active slaves that have a unique
1880 /// (user) file system image. This information is used to transfer files
1881 /// only once to nodes that share a file system (an image). Submasters
1882 /// which are not in fUniqueSlaves are put in the fNonUniqueMasters
1883 /// list. That list is used to trigger the transferring of files to
1884 /// the submaster's unique slaves without the need to transfer the file
1885 /// to the submaster.
1886 
1887 void TProof::FindUniqueSlaves()
1888 {
1889  fUniqueSlaves->Clear();
1890  fUniqueMonitor->RemoveAll();
1891  fAllUniqueSlaves->Clear();
1892  fAllUniqueMonitor->RemoveAll();
1893  fNonUniqueMasters->Clear();
1894 
1895  TIter next(fActiveSlaves);
1896 
1897  while (TSlave *sl = dynamic_cast<TSlave*>(next())) {
1898  if (fImage == sl->fImage) {
1899  if (sl->GetSlaveType() == TSlave::kMaster) {
1900  fNonUniqueMasters->Add(sl);
1901  fAllUniqueSlaves->Add(sl);
1902  fAllUniqueMonitor->Add(sl->GetSocket());
1903  }
1904  continue;
1905  }
1906 
1907  TIter next2(fUniqueSlaves);
1908  TSlave *replace_slave = 0;
1909  Bool_t add = kTRUE;
1910  while (TSlave *sl2 = dynamic_cast<TSlave*>(next2())) {
1911  if (sl->fImage == sl2->fImage) {
1912  add = kFALSE;
1913  if (sl->GetSlaveType() == TSlave::kMaster) {
1914  if (sl2->GetSlaveType() == TSlave::kSlave) {
1915  // give preference to master
1916  replace_slave = sl2;
1917  add = kTRUE;
1918  } else if (sl2->GetSlaveType() == TSlave::kMaster) {
1919  fNonUniqueMasters->Add(sl);
1920  fAllUniqueSlaves->Add(sl);
1921  fAllUniqueMonitor->Add(sl->GetSocket());
1922  } else {
1923  Error("FindUniqueSlaves", "TSlave is neither Master nor Slave");
1924  R__ASSERT(0);
1925  }
1926  }
1927  break;
1928  }
1929  }
1930 
1931  if (add) {
1932  fUniqueSlaves->Add(sl);
1933  fAllUniqueSlaves->Add(sl);
1934  fUniqueMonitor->Add(sl->GetSocket());
1935  fAllUniqueMonitor->Add(sl->GetSocket());
1936  if (replace_slave) {
1937  fUniqueSlaves->Remove(replace_slave);
1938  fAllUniqueSlaves->Remove(replace_slave);
1939  fUniqueMonitor->Remove(replace_slave->GetSocket());
1940  fAllUniqueMonitor->Remove(replace_slave->GetSocket());
1941  }
1942  }
1943  }
1944 
1945  // will be actiavted in Collect()
1946  fUniqueMonitor->DeActivateAll();
1947  fAllUniqueMonitor->DeActivateAll();
1948 }
1949 
1950 ////////////////////////////////////////////////////////////////////////////////
1951 /// Return number of slaves as described in the config file.
1952 
1953 Int_t TProof::GetNumberOfSlaves() const
1954 {
1955  return fSlaves->GetSize();
1956 }
1957 
1958 ////////////////////////////////////////////////////////////////////////////////
1959 /// Return number of active slaves, i.e. slaves that are valid and in
1960 /// the current computing group.
1961 
1962 Int_t TProof::GetNumberOfActiveSlaves() const
1963 {
1964  return fActiveSlaves->GetSize();
1965 }
1966 
1967 ////////////////////////////////////////////////////////////////////////////////
1968 /// Return number of inactive slaves, i.e. slaves that are valid but not in
1969 /// the current computing group.
1970 
1971 Int_t TProof::GetNumberOfInactiveSlaves() const
1972 {
1973  return fInactiveSlaves->GetSize();
1974 }
1975 
1976 ////////////////////////////////////////////////////////////////////////////////
1977 /// Return number of unique slaves, i.e. active slaves that have each a
1978 /// unique different user files system.
1979 
1980 Int_t TProof::GetNumberOfUniqueSlaves() const
1981 {
1982  return fUniqueSlaves->GetSize();
1983 }
1984 
1985 ////////////////////////////////////////////////////////////////////////////////
1986 /// Return number of bad slaves. This are slaves that we in the config
1987 /// file, but refused to startup or that died during the PROOF session.
1988 
1989 Int_t TProof::GetNumberOfBadSlaves() const
1990 {
1991  return fBadSlaves->GetSize();
1992 }
1993 
1994 ////////////////////////////////////////////////////////////////////////////////
1995 /// Ask the for the statistics of the slaves.
1996 
1997 void TProof::AskStatistics()
1998 {
1999  if (!IsValid()) return;
2000 
2001  Broadcast(kPROOF_GETSTATS, kActive);
2002  Collect(kActive, fCollectTimeout);
2003 }
2004 
2005 ////////////////////////////////////////////////////////////////////////////////
2006 /// Get statistics about CPU time, real time and bytes read.
2007 /// If verbose, print the resuls (always available via GetCpuTime(), GetRealTime()
2008 /// and GetBytesRead()
2009 
2010 void TProof::GetStatistics(Bool_t verbose)
2011 {
2012  if (fProtocol > 27) {
2013  // This returns the correct result
2014  AskStatistics();
2015  } else {
2016  // AskStatistics is buggy: parse the output of Print()
2017  RedirectHandle_t rh;
2018  gSystem->RedirectOutput(fLogFileName, "a", &rh);
2019  Print();
2020  gSystem->RedirectOutput(0, 0, &rh);
2021  TMacro *mp = GetLastLog();
2022  if (mp) {
2023  // Look for global directories
2024  TIter nxl(mp->GetListOfLines());
2025  TObjString *os = 0;
2026  while ((os = (TObjString *) nxl())) {
2027  TString s(os->GetName());
2028  if (s.Contains("Total MB's processed:")) {
2029  s.ReplaceAll("Total MB's processed:", "");
2030  if (s.IsFloat()) fBytesRead = (Long64_t) s.Atof() * (1024*1024);
2031  } else if (s.Contains("Total real time used (s):")) {
2032  s.ReplaceAll("Total real time used (s):", "");
2033  if (s.IsFloat()) fRealTime = s.Atof();
2034  } else if (s.Contains("Total CPU time used (s):")) {
2035  s.ReplaceAll("Total CPU time used (s):", "");
2036  if (s.IsFloat()) fCpuTime = s.Atof();
2037  }
2038  }
2039  delete mp;
2040  }
2041  }
2042 
2043  if (verbose) {
2044  Printf(" Real/CPU time (s): %.3f / %.3f; workers: %d; processed: %.2f MBs",
2045  GetRealTime(), GetCpuTime(), GetParallel(), float(GetBytesRead())/(1024*1024));
2046  }
2047 }
2048 
2049 ////////////////////////////////////////////////////////////////////////////////
2050 /// Ask the for the number of parallel slaves.
2051 
2052 void TProof::AskParallel()
2053 {
2054  if (!IsValid()) return;
2055 
2056  Broadcast(kPROOF_GETPARALLEL, kActive);
2057  Collect(kActive, fCollectTimeout);
2058 }
2059 
2060 ////////////////////////////////////////////////////////////////////////////////
2061 /// Ask the master for the list of queries.
2062 
2063 TList *TProof::GetListOfQueries(Option_t *opt)
2064 {
2065  if (!IsValid() || TestBit(TProof::kIsMaster)) return (TList *)0;
2066 
2067  Bool_t all = ((strchr(opt,'A') || strchr(opt,'a'))) ? kTRUE : kFALSE;
2068  TMessage m(kPROOF_QUERYLIST);
2069  m << all;
2070  Broadcast(m, kActive);
2071  Collect(kActive, fCollectTimeout);
2072 
2073  // This should have been filled by now
2074  return fQueries;
2075 }
2076 
2077 ////////////////////////////////////////////////////////////////////////////////
2078 /// Number of queries processed by this session
2079 
2080 Int_t TProof::GetNumberOfQueries()
2081 {
2082  if (fQueries)
2083  return fQueries->GetSize() - fOtherQueries;
2084  return 0;
2085 }
2086 
2087 ////////////////////////////////////////////////////////////////////////////////
2088 /// Set max number of draw queries whose results are saved
2089 
2090 void TProof::SetMaxDrawQueries(Int_t max)
2091 {
2092  if (max > 0) {
2093  if (fPlayer)
2094  fPlayer->SetMaxDrawQueries(max);
2095  fMaxDrawQueries = max;
2096  }
2097 }
2098 
2099 ////////////////////////////////////////////////////////////////////////////////
2100 /// Get max number of queries whose full results are kept in the
2101 /// remote sandbox
2102 
2103 void TProof::GetMaxQueries()
2104 {
2105  TMessage m(kPROOF_MAXQUERIES);
2106  m << kFALSE;
2107  Broadcast(m, kActive);
2108  Collect(kActive, fCollectTimeout);
2109 }
2110 
2111 ////////////////////////////////////////////////////////////////////////////////
2112 /// Return pointer to the list of query results in the player
2113 
2114 TList *TProof::GetQueryResults()
2115 {
2116  return (fPlayer ? fPlayer->GetListOfResults() : (TList *)0);
2117 }
2118 
2119 ////////////////////////////////////////////////////////////////////////////////
2120 /// Return pointer to the full TQueryResult instance owned by the player
2121 /// and referenced by 'ref'. If ref = 0 or "", return the last query result.
2122 
2123 TQueryResult *TProof::GetQueryResult(const char *ref)
2124 {
2125  return (fPlayer ? fPlayer->GetQueryResult(ref) : (TQueryResult *)0);
2126 }
2127 
2128 ////////////////////////////////////////////////////////////////////////////////
2129 /// Ask the master for the list of queries.
2130 /// Options:
2131 /// "A" show information about all the queries known to the
2132 /// server, i.e. even those processed by other sessions
2133 /// "L" show only information about queries locally available
2134 /// i.e. already retrieved. If "L" is specified, "A" is
2135 /// ignored.
2136 /// "F" show all details available about queries
2137 /// "H" print help menu
2138 /// Default ""
2139 
2140 void TProof::ShowQueries(Option_t *opt)
2141 {
2142  Bool_t help = ((strchr(opt,'H') || strchr(opt,'h'))) ? kTRUE : kFALSE;
2143  if (help) {
2144 
2145  // Help
2146 
2147  Printf("+++");
2148  Printf("+++ Options: \"A\" show all queries known to server");
2149  Printf("+++ \"L\" show retrieved queries");
2150  Printf("+++ \"F\" full listing of query info");
2151  Printf("+++ \"H\" print this menu");
2152  Printf("+++");
2153  Printf("+++ (case insensitive)");
2154  Printf("+++");
2155  Printf("+++ Use Retrieve(<#>) to retrieve the full"
2156  " query results from the master");
2157  Printf("+++ e.g. Retrieve(8)");
2158 
2159  Printf("+++");
2160 
2161  return;
2162  }
2163 
2164  if (!IsValid()) return;
2165 
2166  Bool_t local = ((strchr(opt,'L') || strchr(opt,'l'))) ? kTRUE : kFALSE;
2167 
2168  TObject *pq = 0;
2169  if (!local) {
2170  GetListOfQueries(opt);
2171 
2172  if (!fQueries) return;
2173 
2174  TIter nxq(fQueries);
2175 
2176  // Queries processed by other sessions
2177  if (fOtherQueries > 0) {
2178  Printf("+++");
2179  Printf("+++ Queries processed during other sessions: %d", fOtherQueries);
2180  Int_t nq = 0;
2181  while (nq++ < fOtherQueries && (pq = nxq()))
2182  pq->Print(opt);
2183  }
2184 
2185  // Queries processed by this session
2186  Printf("+++");
2187  Printf("+++ Queries processed during this session: selector: %d, draw: %d",
2188  GetNumberOfQueries(), fDrawQueries);
2189  while ((pq = nxq()))
2190  pq->Print(opt);
2191 
2192  } else {
2193 
2194  // Queries processed by this session
2195  Printf("+++");
2196  Printf("+++ Queries processed during this session: selector: %d, draw: %d",
2197  GetNumberOfQueries(), fDrawQueries);
2198 
2199  // Queries available locally
2200  TList *listlocal = fPlayer ? fPlayer->GetListOfResults() : (TList *)0;
2201  if (listlocal) {
2202  Printf("+++");
2203  Printf("+++ Queries available locally: %d", listlocal->GetSize());
2204  TIter nxlq(listlocal);
2205  while ((pq = nxlq()))
2206  pq->Print(opt);
2207  }
2208  }
2209  Printf("+++");
2210 }
2211 
2212 ////////////////////////////////////////////////////////////////////////////////
2213 /// See if the data is ready to be analyzed.
2214 
2215 Bool_t TProof::IsDataReady(Long64_t &totalbytes, Long64_t &bytesready)
2216 {
2217  if (!IsValid()) return kFALSE;
2218 
2219  TList submasters;
2220  TIter nextSlave(GetListOfActiveSlaves());
2221  while (TSlave *sl = dynamic_cast<TSlave*>(nextSlave())) {
2222  if (sl->GetSlaveType() == TSlave::kMaster) {
2223  submasters.Add(sl);
2224  }
2225  }
2226 
2227  fDataReady = kTRUE; //see if any submasters set it to false
2228  fBytesReady = 0;
2229  fTotalBytes = 0;
2230  //loop over submasters and see if data is ready
2231  if (submasters.GetSize() > 0) {
2232  Broadcast(kPROOF_DATA_READY, &submasters);
2233  Collect(&submasters);
2234  }
2235 
2236  bytesready = fBytesReady;
2237  totalbytes = fTotalBytes;
2238 
2239  EmitVA("IsDataReady(Long64_t,Long64_t)", 2, totalbytes, bytesready);
2240 
2241  PDB(kGlobal,2)
2242  Info("IsDataReady", "%lld / %lld (%s)",
2243  bytesready, totalbytes, fDataReady?"READY":"NOT READY");
2244 
2245  return fDataReady;
2246 }
2247 
2248 ////////////////////////////////////////////////////////////////////////////////
2249 /// Send interrupt to master or slave servers.
2250 
2251 void TProof::Interrupt(EUrgent type, ESlaves list)
2252 {
2253  if (!IsValid()) return;
2254 
2255  TList *slaves = 0;
2256  if (list == kAll) slaves = fSlaves;
2257  if (list == kActive) slaves = fActiveSlaves;
2258  if (list == kUnique) slaves = fUniqueSlaves;
2259  if (list == kAllUnique) slaves = fAllUniqueSlaves;
2260 
2261  if (slaves->GetSize() == 0) return;
2262 
2263  TSlave *sl;
2264  TIter next(slaves);
2265 
2266  while ((sl = (TSlave *)next())) {
2267  if (sl->IsValid()) {
2268 
2269  // Ask slave to progate the interrupt request
2270  sl->Interrupt((Int_t)type);
2271  }
2272  }
2273 }
2274 
2275 ////////////////////////////////////////////////////////////////////////////////
2276 /// Returns number of slaves active in parallel mode. Returns 0 in case
2277 /// there are no active slaves. Returns -1 in case of error.
2278 
2279 Int_t TProof::GetParallel() const
2280 {
2281  if (!IsValid()) return -1;
2282 
2283  // iterate over active slaves and return total number of slaves
2284  TIter nextSlave(GetListOfActiveSlaves());
2285  Int_t nparallel = 0;
2286  while (TSlave* sl = dynamic_cast<TSlave*>(nextSlave()))
2287  if (sl->GetParallel() >= 0)
2288  nparallel += sl->GetParallel();
2289 
2290  return nparallel;
2291 }
2292 
2293 ////////////////////////////////////////////////////////////////////////////////
2294 /// Returns list of TSlaveInfo's. In case of error return 0.
2295 
2296 TList *TProof::GetListOfSlaveInfos()
2297 {
2298  if (!IsValid()) return 0;
2299 
2300  if (fSlaveInfo == 0) {
2301  fSlaveInfo = new TSortedList(kSortDescending);
2302  fSlaveInfo->SetOwner();
2303  } else {
2304  fSlaveInfo->Delete();
2305  }
2306 
2307  TList masters;
2308  TIter next(GetListOfSlaves());
2309  TSlave *slave;
2310 
2311  while ((slave = (TSlave *) next()) != 0) {
2312  if (slave->GetSlaveType() == TSlave::kSlave) {
2313  const char *name = IsLite() ? gSystem->HostName() : slave->GetName();
2314  TSlaveInfo *slaveinfo = new TSlaveInfo(slave->GetOrdinal(),
2315  name,
2316  slave->GetPerfIdx());
2317  fSlaveInfo->Add(slaveinfo);
2318 
2319  TIter nextactive(GetListOfActiveSlaves());
2320  TSlave *activeslave;
2321  while ((activeslave = (TSlave *) nextactive())) {
2322  if (TString(slaveinfo->GetOrdinal()) == activeslave->GetOrdinal()) {
2323  slaveinfo->SetStatus(TSlaveInfo::kActive);
2324  break;
2325  }
2326  }
2327 
2328  TIter nextbad(GetListOfBadSlaves());
2329  TSlave *badslave;
2330  while ((badslave = (TSlave *) nextbad())) {
2331  if (TString(slaveinfo->GetOrdinal()) == badslave->GetOrdinal()) {
2332  slaveinfo->SetStatus(TSlaveInfo::kBad);
2333  break;
2334  }
2335  }
2336  // Get system info if supported
2337  if (slave->IsValid()) {
2338  if (slave->GetSocket()->Send(kPROOF_GETSLAVEINFO) == -1)
2339  MarkBad(slave, "could not send kPROOF_GETSLAVEINFO message");
2340  else
2341  masters.Add(slave);
2342  }
2343 
2344  } else if (slave->GetSlaveType() == TSlave::kMaster) {
2345  if (slave->IsValid()) {
2346  if (slave->GetSocket()->Send(kPROOF_GETSLAVEINFO) == -1)
2347  MarkBad(slave, "could not send kPROOF_GETSLAVEINFO message");
2348  else
2349  masters.Add(slave);
2350  }
2351  } else {
2352  Error("GetSlaveInfo", "TSlave is neither Master nor Slave");
2353  R__ASSERT(0);
2354  }
2355  }
2356  if (masters.GetSize() > 0) Collect(&masters);
2357 
2358  return fSlaveInfo;
2359 }
2360 
2361 ////////////////////////////////////////////////////////////////////////////////
2362 /// Activate slave server list.
2363 
2364 void TProof::Activate(TList *slaves)
2365 {
2366  TMonitor *mon = fAllMonitor;
2367  mon->DeActivateAll();
2368 
2369  slaves = !slaves ? fActiveSlaves : slaves;
2370 
2371  TIter next(slaves);
2372  TSlave *sl;
2373  while ((sl = (TSlave*) next())) {
2374  if (sl->IsValid())
2375  mon->Activate(sl->GetSocket());
2376  }
2377 }
2378 
2379 ////////////////////////////////////////////////////////////////////////////////
2380 /// Activate (on == TRUE) or deactivate (on == FALSE) all sockets
2381 /// monitored by 'mon'.
2382 
2383 void TProof::SetMonitor(TMonitor *mon, Bool_t on)
2384 {
2385  TMonitor *m = (mon) ? mon : fCurrentMonitor;
2386  if (m) {
2387  if (on)
2388  m->ActivateAll();
2389  else
2390  m->DeActivateAll();
2391  }
2392 }
2393 
2394 ////////////////////////////////////////////////////////////////////////////////
2395 /// Broadcast the group priority to all workers in the specified list. Returns
2396 /// the number of workers the message was successfully sent to.
2397 /// Returns -1 in case of error.
2398 
2399 Int_t TProof::BroadcastGroupPriority(const char *grp, Int_t priority, TList *workers)
2400 {
2401  if (!IsValid()) return -1;
2402 
2403  if (workers->GetSize() == 0) return 0;
2404 
2405  int nsent = 0;
2406  TIter next(workers);
2407 
2408  TSlave *wrk;
2409  while ((wrk = (TSlave *)next())) {
2410  if (wrk->IsValid()) {
2411  if (wrk->SendGroupPriority(grp, priority) == -1)
2412  MarkBad(wrk, "could not send group priority");
2413  else
2414  nsent++;
2415  }
2416  }
2417 
2418  return nsent;
2419 }
2420 
2421 ////////////////////////////////////////////////////////////////////////////////
2422 /// Broadcast the group priority to all workers in the specified list. Returns
2423 /// the number of workers the message was successfully sent to.
2424 /// Returns -1 in case of error.
2425 
2426 Int_t TProof::BroadcastGroupPriority(const char *grp, Int_t priority, ESlaves list)
2427 {
2428  TList *workers = 0;
2429  if (list == kAll) workers = fSlaves;
2430  if (list == kActive) workers = fActiveSlaves;
2431  if (list == kUnique) workers = fUniqueSlaves;
2432  if (list == kAllUnique) workers = fAllUniqueSlaves;
2433 
2434  return BroadcastGroupPriority(grp, priority, workers);
2435 }
2436 
2437 ////////////////////////////////////////////////////////////////////////////////
2438 /// Reset the merge progress notificator
2439 
2440 void TProof::ResetMergePrg()
2441 {
2442  fMergePrg.Reset(fActiveSlaves->GetSize());
2443 }
2444 
2445 ////////////////////////////////////////////////////////////////////////////////
2446 /// Broadcast a message to all slaves in the specified list. Returns
2447 /// the number of slaves the message was successfully sent to.
2448 /// Returns -1 in case of error.
2449 
2450 Int_t TProof::Broadcast(const TMessage &mess, TList *slaves)
2451 {
2452  if (!IsValid()) return -1;
2453 
2454  if (!slaves || slaves->GetSize() == 0) return 0;
2455 
2456  int nsent = 0;
2457  TIter next(slaves);
2458 
2459  TSlave *sl;
2460  while ((sl = (TSlave *)next())) {
2461  if (sl->IsValid()) {
2462  if (sl->GetSocket()->Send(mess) == -1)
2463  MarkBad(sl, "could not broadcast request");
2464  else
2465  nsent++;
2466  }
2467  }
2468 
2469  return nsent;
2470 }
2471 
2472 ////////////////////////////////////////////////////////////////////////////////
2473 /// Broadcast a message to all slaves in the specified list (either
2474 /// all slaves or only the active slaves). Returns the number of slaves
2475 /// the message was successfully sent to. Returns -1 in case of error.
2476 
2477 Int_t TProof::Broadcast(const TMessage &mess, ESlaves list)
2478 {
2479  TList *slaves = 0;
2480  if (list == kAll) slaves = fSlaves;
2481  if (list == kActive) slaves = fActiveSlaves;
2482  if (list == kUnique) slaves = fUniqueSlaves;
2483  if (list == kAllUnique) slaves = fAllUniqueSlaves;
2484 
2485  return Broadcast(mess, slaves);
2486 }
2487 
2488 ////////////////////////////////////////////////////////////////////////////////
2489 /// Broadcast a character string buffer to all slaves in the specified
2490 /// list. Use kind to set the TMessage what field. Returns the number of
2491 /// slaves the message was sent to. Returns -1 in case of error.
2492 
2493 Int_t TProof::Broadcast(const char *str, Int_t kind, TList *slaves)
2494 {
2495  TMessage mess(kind);
2496  if (str) mess.WriteString(str);
2497  return Broadcast(mess, slaves);
2498 }
2499 
2500 ////////////////////////////////////////////////////////////////////////////////
2501 /// Broadcast a character string buffer to all slaves in the specified
2502 /// list (either all slaves or only the active slaves). Use kind to
2503 /// set the TMessage what field. Returns the number of slaves the message
2504 /// was sent to. Returns -1 in case of error.
2505 
2506 Int_t TProof::Broadcast(const char *str, Int_t kind, ESlaves list)
2507 {
2508  TMessage mess(kind);
2509  if (str) mess.WriteString(str);
2510  return Broadcast(mess, list);
2511 }
2512 
2513 ////////////////////////////////////////////////////////////////////////////////
2514 /// Broadcast an object to all slaves in the specified list. Use kind to
2515 /// set the TMEssage what field. Returns the number of slaves the message
2516 /// was sent to. Returns -1 in case of error.
2517 
2518 Int_t TProof::BroadcastObject(const TObject *obj, Int_t kind, TList *slaves)
2519 {
2520  TMessage mess(kind);
2521  mess.WriteObject(obj);
2522  return Broadcast(mess, slaves);
2523 }
2524 
2525 ////////////////////////////////////////////////////////////////////////////////
2526 /// Broadcast an object to all slaves in the specified list. Use kind to
2527 /// set the TMEssage what field. Returns the number of slaves the message
2528 /// was sent to. Returns -1 in case of error.
2529 
2530 Int_t TProof::BroadcastObject(const TObject *obj, Int_t kind, ESlaves list)
2531 {
2532  TMessage mess(kind);
2533  mess.WriteObject(obj);
2534  return Broadcast(mess, list);
2535 }
2536 
2537 ////////////////////////////////////////////////////////////////////////////////
2538 /// Broadcast a raw buffer of specified length to all slaves in the
2539 /// specified list. Returns the number of slaves the buffer was sent to.
2540 /// Returns -1 in case of error.
2541 
2542 Int_t TProof::BroadcastRaw(const void *buffer, Int_t length, TList *slaves)
2543 {
2544  if (!IsValid()) return -1;
2545 
2546  if (slaves->GetSize() == 0) return 0;
2547 
2548  int nsent = 0;
2549  TIter next(slaves);
2550 
2551  TSlave *sl;
2552  while ((sl = (TSlave *)next())) {
2553  if (sl->IsValid()) {
2554  if (sl->GetSocket()->SendRaw(buffer, length) == -1)
2555  MarkBad(sl, "could not send broadcast-raw request");
2556  else
2557  nsent++;
2558  }
2559  }
2560 
2561  return nsent;
2562 }
2563 
2564 ////////////////////////////////////////////////////////////////////////////////
2565 /// Broadcast a raw buffer of specified length to all slaves in the
2566 /// specified list. Returns the number of slaves the buffer was sent to.
2567 /// Returns -1 in case of error.
2568 
2569 Int_t TProof::BroadcastRaw(const void *buffer, Int_t length, ESlaves list)
2570 {
2571  TList *slaves = 0;
2572  if (list == kAll) slaves = fSlaves;
2573  if (list == kActive) slaves = fActiveSlaves;
2574  if (list == kUnique) slaves = fUniqueSlaves;
2575  if (list == kAllUnique) slaves = fAllUniqueSlaves;
2576 
2577  return BroadcastRaw(buffer, length, slaves);
2578 }
2579 
2580 ////////////////////////////////////////////////////////////////////////////////
2581 /// Broadcast file to all workers in the specified list. Returns the number of workers
2582 /// the buffer was sent to.
2583 /// Returns -1 in case of error.
2584 
2585 Int_t TProof::BroadcastFile(const char *file, Int_t opt, const char *rfile, TList *wrks)
2586 {
2587  if (!IsValid()) return -1;
2588 
2589  if (wrks->GetSize() == 0) return 0;
2590 
2591  int nsent = 0;
2592  TIter next(wrks);
2593 
2594  TSlave *wrk;
2595  while ((wrk = (TSlave *)next())) {
2596  if (wrk->IsValid()) {
2597  if (SendFile(file, opt, rfile, wrk) < 0)
2598  Error("BroadcastFile",
2599  "problems sending file to worker %s (%s)",
2600  wrk->GetOrdinal(), wrk->GetName());
2601  else
2602  nsent++;
2603  }
2604  }
2605 
2606  return nsent;
2607 }
2608 
2609 ////////////////////////////////////////////////////////////////////////////////
2610 /// Broadcast file to all workers in the specified list. Returns the number of workers
2611 /// the buffer was sent to.
2612 /// Returns -1 in case of error.
2613 
2614 Int_t TProof::BroadcastFile(const char *file, Int_t opt, const char *rfile, ESlaves list)
2615 {
2616  TList *wrks = 0;
2617  if (list == kAll) wrks = fSlaves;
2618  if (list == kActive) wrks = fActiveSlaves;
2619  if (list == kUnique) wrks = fUniqueSlaves;
2620  if (list == kAllUnique) wrks = fAllUniqueSlaves;
2621 
2622  return BroadcastFile(file, opt, rfile, wrks);
2623 }
2624 
2625 ////////////////////////////////////////////////////////////////////////////////
2626 /// Release the used monitor to be used, making sure to delete newly created
2627 /// monitors.
2628 
2629 void TProof::ReleaseMonitor(TMonitor *mon)
2630 {
2631  if (mon && (mon != fAllMonitor) && (mon != fActiveMonitor)
2632  && (mon != fUniqueMonitor) && (mon != fAllUniqueMonitor)) {
2633  delete mon;
2634  }
2635 }
2636 
2637 ////////////////////////////////////////////////////////////////////////////////
2638 /// Collect responses from slave sl. Returns the number of slaves that
2639 /// responded (=1).
2640 /// If timeout >= 0, wait at most timeout seconds (timeout = -1 by default,
2641 /// which means wait forever).
2642 /// If defined (>= 0) endtype is the message that stops this collection.
2643 
2644 Int_t TProof::Collect(const TSlave *sl, Long_t timeout, Int_t endtype, Bool_t deactonfail)
2645 {
2646  Int_t rc = 0;
2647 
2648  TMonitor *mon = 0;
2649  if (!sl->IsValid()) return 0;
2650 
2651  if (fCurrentMonitor == fAllMonitor) {
2652  mon = new TMonitor;
2653  } else {
2654  mon = fAllMonitor;
2655  mon->DeActivateAll();
2656  }
2657  mon->Activate(sl->GetSocket());
2658 
2659  rc = Collect(mon, timeout, endtype, deactonfail);
2660  ReleaseMonitor(mon);
2661  return rc;
2662 }
2663 
2664 ////////////////////////////////////////////////////////////////////////////////
2665 /// Collect responses from the slave servers. Returns the number of slaves
2666 /// that responded.
2667 /// If timeout >= 0, wait at most timeout seconds (timeout = -1 by default,
2668 /// which means wait forever).
2669 /// If defined (>= 0) endtype is the message that stops this collection.
2670 
2671 Int_t TProof::Collect(TList *slaves, Long_t timeout, Int_t endtype, Bool_t deactonfail)
2672 {
2673  Int_t rc = 0;
2674 
2675  TMonitor *mon = 0;
2676 
2677  if (fCurrentMonitor == fAllMonitor) {
2678  mon = new TMonitor;
2679  } else {
2680  mon = fAllMonitor;
2681  mon->DeActivateAll();
2682  }
2683  TIter next(slaves);
2684  TSlave *sl;
2685  while ((sl = (TSlave*) next())) {
2686  if (sl->IsValid())
2687  mon->Activate(sl->GetSocket());
2688  }
2689 
2690  rc = Collect(mon, timeout, endtype, deactonfail);
2691  ReleaseMonitor(mon);
2692  return rc;
2693 }
2694 
2695 ////////////////////////////////////////////////////////////////////////////////
2696 /// Collect responses from the slave servers. Returns the number of slaves
2697 /// that responded.
2698 /// If timeout >= 0, wait at most timeout seconds (timeout = -1 by default,
2699 /// which means wait forever).
2700 /// If defined (>= 0) endtype is the message that stops this collection.
2701 
2702 Int_t TProof::Collect(ESlaves list, Long_t timeout, Int_t endtype, Bool_t deactonfail)
2703 {
2704  Int_t rc = 0;
2705  TMonitor *mon = 0;
2706 
2707  if (list == kAll) mon = fAllMonitor;
2708  if (list == kActive) mon = fActiveMonitor;
2709  if (list == kUnique) mon = fUniqueMonitor;
2710  if (list == kAllUnique) mon = fAllUniqueMonitor;
2711  if (fCurrentMonitor == mon) {
2712  // Get a copy
2713  mon = new TMonitor(*mon);
2714  }
2715  mon->ActivateAll();
2716 
2717  rc = Collect(mon, timeout, endtype, deactonfail);
2718  ReleaseMonitor(mon);
2719  return rc;
2720 }
2721 
2722 ////////////////////////////////////////////////////////////////////////////////
2723 /// Collect responses from the slave servers. Returns the number of messages
2724 /// received. Can be 0 if there are no active slaves.
2725 /// If timeout >= 0, wait at most timeout seconds (timeout = -1 by default,
2726 /// which means wait forever).
2727 /// If defined (>= 0) endtype is the message that stops this collection.
2728 /// Collect also stops its execution from time to time to check for new
2729 /// workers in Dynamic Startup mode.
2730 
2731 Int_t TProof::Collect(TMonitor *mon, Long_t timeout, Int_t endtype, Bool_t deactonfail)
2732 {
2733  Int_t collectId = gRandom->Integer(9999);
2734 
2735  PDB(kCollect, 3)
2736  Info("Collect", ">>>>>> Entering collect responses #%04d", collectId);
2737 
2738  // Reset the status flag and clear the messages in the list, if any
2739  fStatus = 0;
2740  fRecvMessages->Clear();
2741 
2742  Long_t actto = (Long_t)(gEnv->GetValue("Proof.SocketActivityTimeout", -1) * 1000);
2743 
2744  if (!mon->GetActive(actto)) return 0;
2745 
2746  DeActivateAsyncInput();
2747 
2748  // Used by external code to know what we are monitoring
2749  TMonitor *savedMonitor = 0;
2750  if (fCurrentMonitor) {
2751  savedMonitor = fCurrentMonitor;
2752  fCurrentMonitor = mon;
2753  } else {
2754  fCurrentMonitor = mon;
2755  fBytesRead = 0;
2756  fRealTime = 0.0;
2757  fCpuTime = 0.0;
2758  }
2759 
2760  // We want messages on the main window during synchronous collection,
2761  // but we save the present status to restore it at the end
2762  Bool_t saveRedirLog = fRedirLog;
2763  if (!IsIdle() && !IsSync())
2764  fRedirLog = kFALSE;
2765 
2766  int cnt = 0, rc = 0;
2767 
2768  // Timeout counter
2769  Long_t nto = timeout;
2770  PDB(kCollect, 2)
2771  Info("Collect","#%04d: active: %d", collectId, mon->GetActive());
2772 
2773  // On clients, handle Ctrl-C during collection
2774  if (fIntHandler)
2775  fIntHandler->Add();
2776 
2777  // Sockets w/o activity during the last 'sto' millisecs are deactivated
2778  Int_t nact = 0;
2779  Long_t sto = -1;
2780  Int_t nsto = 60;
2781  Int_t pollint = gEnv->GetValue("Proof.DynamicStartupPollInt", (Int_t) kPROOF_DynWrkPollInt_s);
2782  mon->ResetInterrupt();
2783  while ((nact = mon->GetActive(sto)) && (nto < 0 || nto > 0)) {
2784 
2785  // Dump last waiting sockets, if in debug mode
2786  PDB(kCollect, 2) {
2787  if (nact < 4) {
2788  TList *al = mon->GetListOfActives();
2789  if (al && al->GetSize() > 0) {
2790  Info("Collect"," %d node(s) still active:", al->GetSize());
2791  TIter nxs(al);
2792  TSocket *xs = 0;
2793  while ((xs = (TSocket *)nxs())) {
2794  TSlave *wrk = FindSlave(xs);
2795  if (wrk)
2796  Info("Collect"," %s (%s)", wrk->GetName(), wrk->GetOrdinal());
2797  else
2798  Info("Collect"," %p: %s:%d", xs, xs->GetInetAddress().GetHostName(),
2799  xs->GetInetAddress().GetPort());
2800  }
2801  }
2802  }
2803  }
2804 
2805  // Preemptive poll for new workers on the master only in Dynamic Mode and only
2806  // during processing (TODO: should work on Top Master only)
2807  if (TestBit(TProof::kIsMaster) && !IsIdle() && fDynamicStartup && !fIsPollingWorkers &&
2808  ((fLastPollWorkers_s == -1) || (time(0)-fLastPollWorkers_s >= pollint))) {
2809  fIsPollingWorkers = kTRUE;
2810  if (PollForNewWorkers() > 0) DeActivateAsyncInput();
2811  fLastPollWorkers_s = time(0);
2812  fIsPollingWorkers = kFALSE;
2813  PDB(kCollect, 1)
2814  Info("Collect","#%04d: now active: %d", collectId, mon->GetActive());
2815  }
2816 
2817  // Wait for a ready socket
2818  PDB(kCollect, 3)
2819  Info("Collect", "Will invoke Select() #%04d", collectId);
2820  TSocket *s = mon->Select(1000);
2821 
2822  if (s && s != (TSocket *)(-1)) {
2823  // Get and analyse the info it did receive
2824  rc = CollectInputFrom(s, endtype, deactonfail);
2825  if (rc == 1 || (rc == 2 && !savedMonitor)) {
2826  // Deactivate it if we are done with it
2827  mon->DeActivate(s);
2828  PDB(kCollect, 2)
2829  Info("Collect","#%04d: deactivating %p (active: %d, %p)", collectId,
2830  s, mon->GetActive(),
2831  mon->GetListOfActives()->First());
2832  } else if (rc == 2) {
2833  // This end message was for the saved monitor
2834  // Deactivate it if we are done with it
2835  if (savedMonitor) {
2836  savedMonitor->DeActivate(s);
2837  PDB(kCollect, 2)
2838  Info("Collect","save monitor: deactivating %p (active: %d, %p)",
2839  s, savedMonitor->GetActive(),
2840  savedMonitor->GetListOfActives()->First());
2841  }
2842  }
2843 
2844  // Update counter (if no error occured)
2845  if (rc >= 0)
2846  cnt++;
2847  } else {
2848  // If not timed-out, exit if not stopped or not aborted
2849  // (player exits status is finished in such a case); otherwise,
2850  // we still need to collect the partial output info
2851  if (!s)
2852  if (fPlayer && (fPlayer->GetExitStatus() == TVirtualProofPlayer::kFinished))
2853  mon->DeActivateAll();
2854  // Decrease the timeout counter if requested
2855  if (s == (TSocket *)(-1) && nto > 0)
2856  nto--;
2857  }
2858 
2859  // Check if there are workers with ready output to be sent and ask the first to send it
2860  if (IsMaster() && fWrksOutputReady && fWrksOutputReady->GetSize() > 0) {
2861  // Maximum number of concurrent sendings
2862  Int_t mxws = gEnv->GetValue("Proof.ControlSendOutput", 1);
2863  if (TProof::GetParameter(fPlayer->GetInputList(), "PROOF_ControlSendOutput", mxws) != 0)
2864  mxws = gEnv->GetValue("Proof.ControlSendOutput", 1);
2865  TIter nxwr(fWrksOutputReady);
2866  TSlave *wrk = 0;
2867  while (mxws && (wrk = (TSlave *) nxwr())) {
2868  if (!wrk->TestBit(TSlave::kOutputRequested)) {
2869  // Ask worker for output
2870  TMessage sendoutput(kPROOF_SENDOUTPUT);
2871  PDB(kCollect, 2)
2872  Info("Collect", "worker %s was asked to send its output to master",
2873  wrk->GetOrdinal());
2874  if (wrk->GetSocket()->Send(sendoutput) != 1) {
2875  wrk->SetBit(TSlave::kOutputRequested);
2876  mxws--;
2877  }
2878  } else {
2879  // Count
2880  mxws--;
2881  }
2882  }
2883  }
2884 
2885  // Check if we need to check the socket activity (we do it every 10 cycles ~ 10 sec)
2886  sto = -1;
2887  if (--nsto <= 0) {
2888  sto = (Long_t) actto;
2889  nsto = 60;
2890  }
2891 
2892  } // end loop over active monitors
2893 
2894  // If timed-out, deactivate the remaining sockets
2895  if (nto == 0) {
2896  TList *al = mon->GetListOfActives();
2897  if (al && al->GetSize() > 0) {
2898  // Notify the name of those which did timeout
2899  Info("Collect"," %d node(s) went in timeout:", al->GetSize());
2900  TIter nxs(al);
2901  TSocket *xs = 0;
2902  while ((xs = (TSocket *)nxs())) {
2903  TSlave *wrk = FindSlave(xs);
2904  if (wrk)
2905  Info("Collect"," %s", wrk->GetName());
2906  else
2907  Info("Collect"," %p: %s:%d", xs, xs->GetInetAddress().GetHostName(),
2908  xs->GetInetAddress().GetPort());
2909  }
2910  }
2911  mon->DeActivateAll();
2912  }
2913 
2914  // Deactivate Ctrl-C special handler
2915  if (fIntHandler)
2916  fIntHandler->Remove();
2917 
2918  // make sure group view is up to date
2919  SendGroupView();
2920 
2921  // Restore redirection setting
2922  fRedirLog = saveRedirLog;
2923 
2924  // Restore the monitor
2925  fCurrentMonitor = savedMonitor;
2926 
2927  ActivateAsyncInput();
2928 
2929  PDB(kCollect, 3)
2930  Info("Collect", "<<<<<< Exiting collect responses #%04d", collectId);
2931 
2932  return cnt;
2933 }
2934 
2935 ////////////////////////////////////////////////////////////////////////////////
2936 /// Asks the PROOF Serv for new workers in Dynamic Startup mode and activates
2937 /// them. Returns the number of new workers found, or <0 on errors.
2938 
2939 Int_t TProof::PollForNewWorkers()
2940 {
2941  // Requests for worker updates
2942  Int_t dummy = 0;
2943  TList *reqWorkers = new TList();
2944  reqWorkers->SetOwner(kFALSE);
2945 
2946  if (!TestBit(TProof::kIsMaster)) {
2947  Error("PollForNewWorkers", "Can't invoke: not on a master -- should not happen!");
2948  return -1;
2949  }
2950  if (!gProofServ) {
2951  Error("PollForNewWorkers", "No ProofServ available -- should not happen!");
2952  return -1;
2953  }
2954 
2955  gProofServ->GetWorkers(reqWorkers, dummy, kTRUE); // last 2 are dummy
2956 
2957  // List of new workers only (TProofNodeInfo)
2958  TList *newWorkers = new TList();
2959  newWorkers->SetOwner(kTRUE);
2960 
2961  TIter next(reqWorkers);
2962  TProofNodeInfo *ni;
2963  TString fullOrd;
2964  while (( ni = dynamic_cast<TProofNodeInfo *>(next()) )) {
2965 
2966  // Form the full ordinal
2967  fullOrd.Form("%s.%s", gProofServ->GetOrdinal(), ni->GetOrdinal().Data());
2968 
2969  TIter nextInner(fSlaves);
2970  TSlave *sl;
2971  Bool_t found = kFALSE;
2972  while (( sl = dynamic_cast<TSlave *>(nextInner()) )) {
2973  if ( strcmp(sl->GetOrdinal(), fullOrd.Data()) == 0 ) {
2974  found = kTRUE;
2975  break;
2976  }
2977  }
2978 
2979  if (found) delete ni;
2980  else {
2981  newWorkers->Add(ni);
2982  PDB(kGlobal, 1)
2983  Info("PollForNewWorkers", "New worker found: %s:%s",
2984  ni->GetNodeName().Data(), fullOrd.Data());
2985  }
2986  }
2987 
2988  delete reqWorkers; // not owner
2989 
2990  Int_t nNewWorkers = newWorkers->GetEntries();
2991 
2992  // Add the new workers
2993  if (nNewWorkers > 0) {
2994  PDB(kGlobal, 1)
2995  Info("PollForNewWorkers", "Requesting to add %d new worker(s)", newWorkers->GetEntries());
2996  Int_t rv = AddWorkers(newWorkers);
2997  if (rv < 0) {
2998  Error("PollForNewWorkers", "Call to AddWorkers() failed (got %d < 0)", rv);
2999  return -1;
3000  }
3001  // Don't delete newWorkers: AddWorkers() will do that
3002  }
3003  else {
3004  PDB(kGlobal, 2)
3005  Info("PollForNewWorkers", "No new worker found");
3006  delete newWorkers;
3007  }
3008 
3009  return nNewWorkers;
3010 }
3011 
3012 ////////////////////////////////////////////////////////////////////////////////
3013 /// Remove links to objects in list 'ol' from gDirectory
3014 
3015 void TProof::CleanGDirectory(TList *ol)
3016 {
3017  if (ol) {
3018  TIter nxo(ol);
3019  TObject *o = 0;
3020  while ((o = nxo()))
3021  gDirectory->RecursiveRemove(o);
3022  }
3023 }
3024 
3025 ////////////////////////////////////////////////////////////////////////////////
3026 /// Collect and analyze available input from socket s.
3027 /// Returns 0 on success, -1 if any failure occurs.
3028 
3029 Int_t TProof::CollectInputFrom(TSocket *s, Int_t endtype, Bool_t deactonfail)
3030 {
3031  TMessage *mess;
3032 
3033  Int_t recvrc = 0;
3034  if ((recvrc = s->Recv(mess)) < 0) {
3035  PDB(kCollect,2)
3036  Info("CollectInputFrom","%p: got %d from Recv()", s, recvrc);
3037  Bool_t bad = kTRUE;
3038  if (recvrc == -5) {
3039  // Broken connection: try reconnection
3040  if (fCurrentMonitor) fCurrentMonitor->Remove(s);
3041  if (s->Reconnect() == 0) {
3042  if (fCurrentMonitor) fCurrentMonitor->Add(s);
3043  bad = kFALSE;
3044  }
3045  }
3046  if (bad)
3047  MarkBad(s, "problems receiving a message in TProof::CollectInputFrom(...)");
3048  // Ignore this wake up
3049  return -1;
3050  }
3051  if (!mess) {
3052  // we get here in case the remote server died
3053  MarkBad(s, "undefined message in TProof::CollectInputFrom(...)");
3054  return -1;
3055  }
3056  Int_t rc = 0;
3057 
3058  Int_t what = mess->What();
3059  TSlave *sl = FindSlave(s);
3060  rc = HandleInputMessage(sl, mess, deactonfail);
3061  if (rc == 1 && (endtype >= 0) && (what != endtype))
3062  // This message was for the base monitor in recursive case
3063  rc = 2;
3064 
3065  // We are done successfully
3066  return rc;
3067 }
3068 
3069 ////////////////////////////////////////////////////////////////////////////////
3070 /// Analyze the received message.
3071 /// Returns 0 on success (1 if this the last message from this socket), -1 if
3072 /// any failure occurs.
3073 
3074 Int_t TProof::HandleInputMessage(TSlave *sl, TMessage *mess, Bool_t deactonfail)
3075 {
3076  char str[512];
3077  TObject *obj;
3078  Int_t rc = 0;
3079 
3080  if (!mess || !sl) {
3081  Warning("HandleInputMessage", "given an empty message or undefined worker");
3082  return -1;
3083  }
3084  Bool_t delete_mess = kTRUE;
3085  TSocket *s = sl->GetSocket();
3086  if (!s) {
3087  Warning("HandleInputMessage", "worker socket is undefined");
3088  return -1;
3089  }
3090 
3091  // The message type
3092  Int_t what = mess->What();
3093 
3094  PDB(kCollect,3)
3095  Info("HandleInputMessage", "got type %d from '%s'", what, sl->GetOrdinal());
3096 
3097  switch (what) {
3098 
3099  case kMESS_OK:
3100  // Add the message to the list
3101  fRecvMessages->Add(mess);
3102  delete_mess = kFALSE;
3103  break;
3104 
3105  case kMESS_OBJECT:
3106  if (fPlayer) fPlayer->HandleRecvHisto(mess);
3107  break;
3108 
3109  case kPROOF_FATAL:
3110  { TString msg;
3111  if ((mess->BufferSize() > mess->Length()))
3112  (*mess) >> msg;
3113  if (msg.IsNull()) {
3114  MarkBad(s, "received kPROOF_FATAL");
3115  } else {
3116  MarkBad(s, msg);
3117  }
3118  }
3119  if (fProgressDialogStarted) {
3120  // Finalize the progress dialog
3121  Emit("StopProcess(Bool_t)", kTRUE);
3122  }
3123  break;
3124 
3125  case kPROOF_STOP:
3126  // Stop collection from this worker
3127  Info("HandleInputMessage", "received kPROOF_STOP from %s: disabling any further collection this worker",
3128  sl->GetOrdinal());
3129  rc = 1;
3130  break;
3131 
3132  case kPROOF_GETTREEHEADER:
3133  // Add the message to the list
3134  fRecvMessages->Add(mess);
3135  delete_mess = kFALSE;
3136  rc = 1;
3137  break;
3138 
3139  case kPROOF_TOUCH:
3140  // send a request for touching the remote admin file
3141  {
3142  sl->Touch();
3143  }
3144  break;
3145 
3146  case kPROOF_GETOBJECT:
3147  // send slave object it asks for
3148  mess->ReadString(str, sizeof(str));
3149  obj = gDirectory->Get(str);
3150  if (obj)
3151  s->SendObject(obj);
3152  else
3153  s->Send(kMESS_NOTOK);
3154  break;
3155 
3156  case kPROOF_GETPACKET:
3157  {
3158  PDB(kGlobal,2)
3159  Info("HandleInputMessage","%s: kPROOF_GETPACKET", sl->GetOrdinal());
3160  TDSetElement *elem = 0;
3161  elem = fPlayer ? fPlayer->GetNextPacket(sl, mess) : 0;
3162 
3163  if (elem != (TDSetElement*) -1) {
3164  TMessage answ(kPROOF_GETPACKET);
3165  answ << elem;
3166  s->Send(answ);
3167 
3168  while (fWaitingSlaves != 0 && fWaitingSlaves->GetSize()) {
3169  TPair *p = (TPair*) fWaitingSlaves->First();
3170  s = (TSocket*) p->Key();
3171  TMessage *m = (TMessage*) p->Value();
3172 
3173  elem = fPlayer ? fPlayer->GetNextPacket(sl, m) : 0;
3174  if (elem != (TDSetElement*) -1) {
3175  TMessage a(kPROOF_GETPACKET);
3176  a << elem;
3177  s->Send(a);
3178  // remove has to happen via Links because TPair does not have
3179  // a Compare() function and therefore RemoveFirst() and
3180  // Remove(TObject*) do not work
3181  fWaitingSlaves->Remove(fWaitingSlaves->FirstLink());
3182  delete p;
3183  delete m;
3184  } else {
3185  break;
3186  }
3187  }
3188  } else {
3189  if (fWaitingSlaves == 0) fWaitingSlaves = new TList;
3190  fWaitingSlaves->Add(new TPair(s, mess));
3191  delete_mess = kFALSE;
3192  }
3193  }
3194  break;
3195 
3196  case kPROOF_LOGFILE:
3197  {
3198  Int_t size;
3199  (*mess) >> size;
3200  PDB(kGlobal,2)
3201  Info("HandleInputMessage","%s: kPROOF_LOGFILE: size: %d", sl->GetOrdinal(), size);
3202  RecvLogFile(s, size);
3203  }
3204  break;
3205 
3206  case kPROOF_LOGDONE:
3207  (*mess) >> sl->fStatus >> sl->fParallel;
3208  PDB(kCollect,2)
3209  Info("HandleInputMessage","%s: kPROOF_LOGDONE: status %d parallel %d",
3210  sl->GetOrdinal(), sl->fStatus, sl->fParallel);
3211  if (sl->fStatus != 0) {
3212  // Return last nonzero status
3213  fStatus = sl->fStatus;
3214  // Deactivate the worker, if required
3215  if (deactonfail) DeactivateWorker(sl->fOrdinal);
3216  }
3217  // Remove from the workers-ready list
3218  if (fWrksOutputReady && fWrksOutputReady->FindObject(sl)) {
3219  sl->ResetBit(TSlave::kOutputRequested);
3220  fWrksOutputReady->Remove(sl);
3221  }
3222  rc = 1;
3223  break;
3224 
3225  case kPROOF_GETSTATS:
3226  {
3227  (*mess) >> sl->fBytesRead >> sl->fRealTime >> sl->fCpuTime
3228  >> sl->fWorkDir >> sl->fProofWorkDir;
3229  PDB(kCollect,2)
3230  Info("HandleInputMessage", "kPROOF_GETSTATS: %s", sl->fWorkDir.Data());
3231  TString img;
3232  if ((mess->BufferSize() > mess->Length()))
3233  (*mess) >> img;
3234  // Set image
3235  if (img.IsNull()) {
3236  if (sl->fImage.IsNull())
3237  sl->fImage.Form("%s:%s", TUrl(sl->fName).GetHostFQDN(),
3238  sl->fProofWorkDir.Data());
3239  } else {
3240  sl->fImage = img;
3241  }
3242  PDB(kGlobal,2)
3243  Info("HandleInputMessage",
3244  "kPROOF_GETSTATS:%s image: %s", sl->GetOrdinal(), sl->GetImage());
3245 
3246  fBytesRead += sl->fBytesRead;
3247  fRealTime += sl->fRealTime;
3248  fCpuTime += sl->fCpuTime;
3249  rc = 1;
3250  }
3251  break;
3252 
3253  case kPROOF_GETPARALLEL:
3254  {
3255  Bool_t async = kFALSE;
3256  (*mess) >> sl->fParallel;
3257  if ((mess->BufferSize() > mess->Length()))
3258  (*mess) >> async;
3259  rc = (async) ? 0 : 1;
3260  }
3261  break;
3262 
3263  case kPROOF_CHECKFILE:
3264  { // New servers (>= 5.22) send the status
3265  if ((mess->BufferSize() > mess->Length())) {
3266  (*mess) >> fCheckFileStatus;
3267  } else {
3268  // Form old servers this meant success (failure was signaled with the
3269  // dangerous kPROOF_FATAL)
3270  fCheckFileStatus = 1;
3271  }
3272  rc = 1;
3273  }
3274  break;
3275 
3276  case kPROOF_SENDFILE:
3277  { // New server: signals ending of sendfile operation
3278  rc = 1;
3279  }
3280  break;
3281 
3282  case kPROOF_PACKAGE_LIST:
3283  {
3284  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_PACKAGE_LIST: enter");
3285  Int_t type = 0;
3286  (*mess) >> type;
3287  switch (type) {
3288  case TProof::kListEnabledPackages:
3289  SafeDelete(fEnabledPackages);
3290  fEnabledPackages = (TList *) mess->ReadObject(TList::Class());
3291  if (fEnabledPackages) {
3292  fEnabledPackages->SetOwner();
3293  } else {
3294  Error("HandleInputMessage",
3295  "kPROOF_PACKAGE_LIST: kListEnabledPackages: TList not found in message!");
3296  }
3297  break;
3298  case TProof::kListPackages:
3299  SafeDelete(fAvailablePackages);
3300  fAvailablePackages = (TList *) mess->ReadObject(TList::Class());
3301  if (fAvailablePackages) {
3302  fAvailablePackages->SetOwner();
3303  } else {
3304  Error("HandleInputMessage",
3305  "kPROOF_PACKAGE_LIST: kListPackages: TList not found in message!");
3306  }
3307  break;
3308  default:
3309  Error("HandleInputMessage", "kPROOF_PACKAGE_LIST: unknown type: %d", type);
3310  }
3311  }
3312  break;
3313 
3314  case kPROOF_SENDOUTPUT:
3315  {
3316  // We start measuring the merging time
3317  fPlayer->SetMerging();
3318 
3319  // Worker is ready to send output: make sure the relevant bit is reset
3320  sl->ResetBit(TSlave::kOutputRequested);
3321  PDB(kGlobal,2)
3322  Info("HandleInputMessage","kPROOF_SENDOUTPUT: enter (%s)", sl->GetOrdinal());
3323  // Create the list if not yet done
3324  if (!fWrksOutputReady) {
3325  fWrksOutputReady = new TList;
3326  fWrksOutputReady->SetOwner(kFALSE);
3327  }
3328  fWrksOutputReady->Add(sl);
3329  }
3330  break;
3331 
3332  case kPROOF_OUTPUTOBJECT:
3333  {
3334  // We start measuring the merging time
3335  fPlayer->SetMerging();
3336 
3337  PDB(kGlobal,2)
3338  Info("HandleInputMessage","kPROOF_OUTPUTOBJECT: enter");
3339  Int_t type = 0;
3340  const char *prefix = gProofServ ? gProofServ->GetPrefix() : "Lite-0";
3341  if (!TestBit(TProof::kIsClient) && !fMergersSet && !fFinalizationRunning) {
3342  Info("HandleInputMessage", "finalization on %s started ...", prefix);
3343  fFinalizationRunning = kTRUE;
3344  }
3345 
3346  while ((mess->BufferSize() > mess->Length())) {
3347  (*mess) >> type;
3348  // If a query result header, add it to the player list
3349  if (fPlayer) {
3350  if (type == 0) {
3351  // Retrieve query result instance (output list not filled)
3352  TQueryResult *pq =
3353  (TQueryResult *) mess->ReadObject(TQueryResult::Class());
3354  if (pq) {
3355  // Add query to the result list in TProofPlayer
3356  fPlayer->AddQueryResult(pq);
3357  fPlayer->SetCurrentQuery(pq);
3358  // And clear the output list, as we start merging a new set of results
3359  if (fPlayer->GetOutputList())
3360  fPlayer->GetOutputList()->Clear();
3361  // Add the unique query tag as TNamed object to the input list
3362  // so that it is available in TSelectors for monitoring
3363  TString qid = TString::Format("%s:%s",pq->GetTitle(),pq->GetName());
3364  if (fPlayer->GetInputList()->FindObject("PROOF_QueryTag"))
3365  fPlayer->GetInputList()->Remove(fPlayer->GetInputList()->FindObject("PROOF_QueryTag"));
3366  fPlayer->AddInput(new TNamed("PROOF_QueryTag", qid.Data()));
3367  } else {
3368  Warning("HandleInputMessage","kPROOF_OUTPUTOBJECT: query result missing");
3369  }
3370  } else if (type > 0) {
3371  // Read object
3372  TObject *o = mess->ReadObject(TObject::Class());
3373  // Increment counter on the client side
3374  fMergePrg.IncreaseIdx();
3375  TString msg;
3376  Bool_t changed = kFALSE;
3377  msg.Form("%s: merging output objects ... %s", prefix, fMergePrg.Export(changed));
3378  if (gProofServ) {
3379  gProofServ->SendAsynMessage(msg.Data(), kFALSE);
3380  } else if (IsTty() || changed) {
3381  fprintf(stderr, "%s\r", msg.Data());
3382  }
3383  // Add or merge it
3384  if ((fPlayer->AddOutputObject(o) == 1)) {
3385  // Remove the object if it has been merged
3386  SafeDelete(o);
3387  }
3388  if (type > 1) {
3389  // Update the merger progress info
3390  fMergePrg.DecreaseNWrks();
3391  if (TestBit(TProof::kIsClient) && !IsLite()) {
3392  // In PROOFLite this has to be done once only in TProofLite::Process
3393  TQueryResult *pq = fPlayer->GetCurrentQuery();
3394  if (pq) {
3395  pq->SetOutputList(fPlayer->GetOutputList(), kFALSE);
3396  // Add input objects (do not override remote settings, if any)
3397  TObject *xo = 0;
3398  TIter nxin(fPlayer->GetInputList());
3399  // Servers prior to 5.28/00 do not create the input list in the TQueryResult
3400  if (!pq->GetInputList()) pq->SetInputList(new TList());
3401  while ((xo = nxin()))
3402  if (!pq->GetInputList()->FindObject(xo->GetName()))
3403  pq->AddInput(xo->Clone());
3404  // If the last object, notify the GUI that the result arrived
3405  QueryResultReady(TString::Format("%s:%s", pq->GetTitle(), pq->GetName()));
3406  }
3407  // Processing is over
3408  UpdateDialog();
3409  }
3410  }
3411  }
3412  } else {
3413  Warning("HandleInputMessage", "kPROOF_OUTPUTOBJECT: player undefined!");
3414  }
3415  }
3416  }
3417  break;
3418 
3419  case kPROOF_OUTPUTLIST:
3420  {
3421  // We start measuring the merging time
3422 
3423  PDB(kGlobal,2)
3424  Info("HandleInputMessage","%s: kPROOF_OUTPUTLIST: enter", sl->GetOrdinal());
3425  TList *out = 0;
3426  if (fPlayer) {
3427  fPlayer->SetMerging();
3428  if (TestBit(TProof::kIsMaster) || fProtocol < 7) {
3429  out = (TList *) mess->ReadObject(TList::Class());
3430  } else {
3431  TQueryResult *pq =
3432  (TQueryResult *) mess->ReadObject(TQueryResult::Class());
3433  if (pq) {
3434  // Add query to the result list in TProofPlayer
3435  fPlayer->AddQueryResult(pq);
3436  fPlayer->SetCurrentQuery(pq);
3437  // To avoid accidental cleanups from anywhere else
3438  // remove objects from gDirectory and clone the list
3439  out = pq->GetOutputList();
3440  CleanGDirectory(out);
3441  out = (TList *) out->Clone();
3442  // Notify the GUI that the result arrived
3443  QueryResultReady(TString::Format("%s:%s", pq->GetTitle(), pq->GetName()));
3444  } else {
3445  PDB(kGlobal,2)
3446  Info("HandleInputMessage",
3447  "%s: kPROOF_OUTPUTLIST: query result missing", sl->GetOrdinal());
3448  }
3449  }
3450  if (out) {
3451  out->SetOwner();
3452  fPlayer->AddOutput(out); // Incorporate the list
3453  SafeDelete(out);
3454  } else {
3455  PDB(kGlobal,2)
3456  Info("HandleInputMessage",
3457  "%s: kPROOF_OUTPUTLIST: outputlist is empty", sl->GetOrdinal());
3458  }
3459  } else {
3460  Warning("HandleInputMessage",
3461  "%s: kPROOF_OUTPUTLIST: player undefined!", sl->GetOrdinal());
3462  }
3463  // On clients at this point processing is over
3464  if (TestBit(TProof::kIsClient) && !IsLite())
3465  UpdateDialog();
3466  }
3467  break;
3468 
3469  case kPROOF_QUERYLIST:
3470  {
3471  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_QUERYLIST: enter");
3472  (*mess) >> fOtherQueries >> fDrawQueries;
3473  if (fQueries) {
3474  fQueries->Delete();
3475  delete fQueries;
3476  fQueries = 0;
3477  }
3478  fQueries = (TList *) mess->ReadObject(TList::Class());
3479  }
3480  break;
3481 
3482  case kPROOF_RETRIEVE:
3483  {
3484  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_RETRIEVE: enter");
3485  TQueryResult *pq =
3486  (TQueryResult *) mess->ReadObject(TQueryResult::Class());
3487  if (pq && fPlayer) {
3488  fPlayer->AddQueryResult(pq);
3489  // Notify the GUI that the result arrived
3490  QueryResultReady(TString::Format("%s:%s", pq->GetTitle(), pq->GetName()));
3491  } else {
3492  PDB(kGlobal,2)
3493  Info("HandleInputMessage",
3494  "kPROOF_RETRIEVE: query result missing or player undefined");
3495  }
3496  }
3497  break;
3498 
3499  case kPROOF_MAXQUERIES:
3500  {
3501  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_MAXQUERIES: enter");
3502  Int_t max = 0;
3503 
3504  (*mess) >> max;
3505  Printf("Number of queries fully kept remotely: %d", max);
3506  }
3507  break;
3508 
3509  case kPROOF_SERVERSTARTED:
3510  {
3511  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_SERVERSTARTED: enter");
3512 
3513  UInt_t tot = 0, done = 0;
3514  TString action;
3515  Bool_t st = kTRUE;
3516 
3517  (*mess) >> action >> tot >> done >> st;
3518 
3519  if (TestBit(TProof::kIsClient)) {
3520  if (tot) {
3521  TString type = (action.Contains("submas")) ? "submasters"
3522  : "workers";
3523  Int_t frac = (Int_t) (done*100.)/tot;
3524  char msg[512] = {0};
3525  if (frac >= 100) {
3526  snprintf(msg, 512, "%s: OK (%d %s) \n",
3527  action.Data(),tot, type.Data());
3528  } else {
3529  snprintf(msg, 512, "%s: %d out of %d (%d %%)\r",
3530  action.Data(), done, tot, frac);
3531  }
3532  if (fSync)
3533  fprintf(stderr,"%s", msg);
3534  else
3535  NotifyLogMsg(msg, 0);
3536  }
3537  // Notify GUIs
3538  StartupMessage(action.Data(), st, (Int_t)done, (Int_t)tot);
3539  } else {
3540 
3541  // Just send the message one level up
3542  TMessage m(kPROOF_SERVERSTARTED);
3543  m << action << tot << done << st;
3544  gProofServ->GetSocket()->Send(m);
3545  }
3546  }
3547  break;
3548 
3549  case kPROOF_DATASET_STATUS:
3550  {
3551  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_DATASET_STATUS: enter");
3552 
3553  UInt_t tot = 0, done = 0;
3554  TString action;
3555  Bool_t st = kTRUE;
3556 
3557  (*mess) >> action >> tot >> done >> st;
3558 
3559  if (TestBit(TProof::kIsClient)) {
3560  if (tot) {
3561  TString type = "files";
3562  Int_t frac = (Int_t) (done*100.)/tot;
3563  char msg[512] = {0};
3564  if (frac >= 100) {
3565  snprintf(msg, 512, "%s: OK (%d %s) \n",
3566  action.Data(),tot, type.Data());
3567  } else {
3568  snprintf(msg, 512, "%s: %d out of %d (%d %%)\r",
3569  action.Data(), done, tot, frac);
3570  }
3571  if (fSync)
3572  fprintf(stderr,"%s", msg);
3573  else
3574  NotifyLogMsg(msg, 0);
3575  }
3576  // Notify GUIs
3577  DataSetStatus(action.Data(), st, (Int_t)done, (Int_t)tot);
3578  } else {
3579 
3580  // Just send the message one level up
3581  TMessage m(kPROOF_DATASET_STATUS);
3582  m << action << tot << done << st;
3583  gProofServ->GetSocket()->Send(m);
3584  }
3585  }
3586  break;
3587 
3588  case kPROOF_STARTPROCESS:
3589  {
3590  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_STARTPROCESS: enter");
3591 
3592  // For Proof-Lite this variable is the number of workers and is set
3593  // by the player
3594  if (!IsLite()) {
3595  fNotIdle = 1;
3596  fIsWaiting = kFALSE;
3597  }
3598 
3599  // Redirect the output, if needed
3600  fRedirLog = (fSync) ? fRedirLog : kTRUE;
3601 
3602  // The signal is used on masters by XrdProofdProtocol to catch
3603  // the start of processing; on clients it allows to update the
3604  // progress dialog
3605  if (!TestBit(TProof::kIsMaster)) {
3606 
3607  // This is the end of preparation
3608  fQuerySTW.Stop();
3609  fPrepTime = fQuerySTW.RealTime();
3610  PDB(kGlobal,2) Info("HandleInputMessage","Preparation time: %f s", fPrepTime);
3611 
3612  TString selec;
3613  Int_t dsz = -1;
3614  Long64_t first = -1, nent = -1;
3615  (*mess) >> selec >> dsz >> first >> nent;
3616  // Start or reset the progress dialog
3617  if (!gROOT->IsBatch()) {
3618  if (fProgressDialog &&
3619  !TestBit(kUsingSessionGui) && TestBit(kUseProgressDialog)) {
3620  if (!fProgressDialogStarted) {
3621  fProgressDialog->ExecPlugin(5, this,
3622  selec.Data(), dsz, first, nent);
3623  fProgressDialogStarted = kTRUE;
3624  } else {
3625  ResetProgressDialog(selec, dsz, first, nent);
3626  }
3627  }
3628  ResetBit(kUsingSessionGui);
3629  }
3630  }
3631  }
3632  break;
3633 
3634  case kPROOF_ENDINIT:
3635  {
3636  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_ENDINIT: enter");
3637 
3638  if (TestBit(TProof::kIsMaster)) {
3639  if (fPlayer)
3640  fPlayer->SetInitTime();
3641  }
3642  }
3643  break;
3644 
3645  case kPROOF_SETIDLE:
3646  {
3647  PDB(kGlobal,2)
3648  Info("HandleInputMessage","kPROOF_SETIDLE from '%s': enter (%d)", sl->GetOrdinal(), fNotIdle);
3649 
3650  // The session is idle
3651  if (IsLite()) {
3652  if (fNotIdle > 0) {
3653  fNotIdle--;
3654  PDB(kGlobal,2)
3655  Info("HandleInputMessage", "%s: got kPROOF_SETIDLE", sl->GetOrdinal());
3656  } else {
3657  Warning("HandleInputMessage",
3658  "%s: got kPROOF_SETIDLE but no running workers ! protocol error?",
3659  sl->GetOrdinal());
3660  }
3661  } else {
3662  fNotIdle = 0;
3663  // Check if the query has been enqueued
3664  if ((mess->BufferSize() > mess->Length()))
3665  (*mess) >> fIsWaiting;
3666  }
3667  }
3668  break;
3669 
3670  case kPROOF_QUERYSUBMITTED:
3671  {
3672  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_QUERYSUBMITTED: enter");
3673 
3674  // We have received the sequential number
3675  (*mess) >> fSeqNum;
3676  Bool_t sync = fSync;
3677  if ((mess->BufferSize() > mess->Length()))
3678  (*mess) >> sync;
3679  if (sync != fSync && fSync) {
3680  // The server required to switch to asynchronous mode
3681  Activate();
3682  fSync = kFALSE;
3683  }
3684  DisableGoAsyn();
3685  // Check if the query has been enqueued
3686  fIsWaiting = kTRUE;
3687  // For Proof-Lite this variable is the number of workers and is set by the player
3688  if (!IsLite())
3689  fNotIdle = 1;
3690 
3691  rc = 1;
3692  }
3693  break;
3694 
3695  case kPROOF_SESSIONTAG:
3696  {
3697  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_SESSIONTAG: enter");
3698 
3699  // We have received the unique tag and save it as name of this object
3700  TString stag;
3701  (*mess) >> stag;
3702  SetName(stag);
3703  // In the TSlave object
3704  sl->SetSessionTag(stag);
3705  // Server may have also sent the group
3706  if ((mess->BufferSize() > mess->Length()))
3707  (*mess) >> fGroup;
3708  // Server may have also sent the user
3709  if ((mess->BufferSize() > mess->Length())) {
3710  TString usr;
3711  (*mess) >> usr;
3712  if (!usr.IsNull()) fUrl.SetUser(usr.Data());
3713  }
3714  }
3715  break;
3716 
3717  case kPROOF_FEEDBACK:
3718  {
3719  PDB(kGlobal,2)
3720  Info("HandleInputMessage","kPROOF_FEEDBACK: enter");
3721  TList *out = (TList *) mess->ReadObject(TList::Class());
3722  out->SetOwner();
3723  if (fPlayer)
3724  fPlayer->StoreFeedback(sl, out); // Adopts the list
3725  else
3726  // Not yet ready: stop collect asap
3727  rc = 1;
3728  }
3729  break;
3730 
3731  case kPROOF_AUTOBIN:
3732  {
3733  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_AUTOBIN: enter");
3734 
3735  TString name;
3736  Double_t xmin, xmax, ymin, ymax, zmin, zmax;
3737 
3738  (*mess) >> name >> xmin >> xmax >> ymin >> ymax >> zmin >> zmax;
3739 
3740  if (fPlayer) fPlayer->UpdateAutoBin(name,xmin,xmax,ymin,ymax,zmin,zmax);
3741 
3742  TMessage answ(kPROOF_AUTOBIN);
3743 
3744  answ << name << xmin << xmax << ymin << ymax << zmin << zmax;
3745 
3746  s->Send(answ);
3747  }
3748  break;
3749 
3750  case kPROOF_PROGRESS:
3751  {
3752  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_PROGRESS: enter");
3753 
3754  if (GetRemoteProtocol() > 25) {
3755  // New format
3756  TProofProgressInfo *pi = 0;
3757  (*mess) >> pi;
3758  fPlayer->Progress(sl,pi);
3759  } else if (GetRemoteProtocol() > 11) {
3760  Long64_t total, processed, bytesread;
3761  Float_t initTime, procTime, evtrti, mbrti;
3762  (*mess) >> total >> processed >> bytesread
3763  >> initTime >> procTime
3764  >> evtrti >> mbrti;
3765  if (fPlayer)
3766  fPlayer->Progress(sl, total, processed, bytesread,
3767  initTime, procTime, evtrti, mbrti);
3768 
3769  } else {
3770  // Old format
3771  Long64_t total, processed;
3772  (*mess) >> total >> processed;
3773  if (fPlayer)
3774  fPlayer->Progress(sl, total, processed);
3775  }
3776  }
3777  break;
3778 
3779  case kPROOF_STOPPROCESS:
3780  {
3781  // This message is sent from a worker that finished processing.
3782  // We determine whether it was asked to finish by the
3783  // packetizer or stopped during processing a packet
3784  // (by TProof::RemoveWorkers() or by an external signal).
3785  // In the later case call packetizer->MarkBad.
3786  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_STOPPROCESS: enter");
3787 
3788  Long64_t events = 0;
3789  Bool_t abort = kFALSE;
3790  TProofProgressStatus *status = 0;
3791 
3792  if ((mess->BufferSize() > mess->Length()) && (fProtocol > 18)) {
3793  (*mess) >> status >> abort;
3794  } else if ((mess->BufferSize() > mess->Length()) && (fProtocol > 8)) {
3795  (*mess) >> events >> abort;
3796  } else {
3797  (*mess) >> events;
3798  }
3799  if (fPlayer) {
3800  if (fProtocol > 18) {
3801  TList *listOfMissingFiles = 0;
3802  if (!(listOfMissingFiles = (TList *)GetOutput("MissingFiles"))) {
3803  listOfMissingFiles = new TList();
3804  listOfMissingFiles->SetName("MissingFiles");
3805  if (fPlayer)
3806  fPlayer->AddOutputObject(listOfMissingFiles);
3807  }
3808  if (fPlayer->GetPacketizer()) {
3809  Int_t ret =
3810  fPlayer->GetPacketizer()->AddProcessed(sl, status, 0, &listOfMissingFiles);
3811  if (ret > 0)
3812  fPlayer->GetPacketizer()->MarkBad(sl, status, &listOfMissingFiles);
3813  // This object is now owned by the packetizer
3814  status = 0;
3815  }
3816  if (status) fPlayer->AddEventsProcessed(status->GetEntries());
3817  } else {
3818  fPlayer->AddEventsProcessed(events);
3819  }
3820  }
3821  SafeDelete(status);
3822  if (!TestBit(TProof::kIsMaster))
3823  Emit("StopProcess(Bool_t)", abort);
3824  break;
3825  }
3826 
3827  case kPROOF_SUBMERGER:
3828  {
3829  PDB(kGlobal,2) Info("HandleInputMessage", "kPROOF_SUBMERGER: enter");
3830  HandleSubmerger(mess, sl);
3831  }
3832  break;
3833 
3834  case kPROOF_GETSLAVEINFO:
3835  {
3836  PDB(kGlobal,2) Info("HandleInputMessage", "kPROOF_GETSLAVEINFO: enter");
3837 
3838  Bool_t active = (GetListOfActiveSlaves()->FindObject(sl) != 0);
3839  Bool_t bad = (GetListOfBadSlaves()->FindObject(sl) != 0);
3840  TList* tmpinfo = 0;
3841  (*mess) >> tmpinfo;
3842  if (tmpinfo == 0) {
3843  Error("HandleInputMessage", "kPROOF_GETSLAVEINFO: no list received!");
3844  } else {
3845  tmpinfo->SetOwner(kFALSE);
3846  Int_t nentries = tmpinfo->GetSize();
3847  for (Int_t i=0; i<nentries; i++) {
3848  TSlaveInfo* slinfo =
3849  dynamic_cast<TSlaveInfo*>(tmpinfo->At(i));
3850  if (slinfo) {
3851  // If PROOF-Lite
3852  if (IsLite()) slinfo->fHostName = gSystem->HostName();
3853  // Check if we have already a instance for this worker
3854  TIter nxw(fSlaveInfo);
3855  TSlaveInfo *ourwi = 0;
3856  while ((ourwi = (TSlaveInfo *)nxw())) {
3857  if (!strcmp(ourwi->GetOrdinal(), slinfo->GetOrdinal())) {
3858  ourwi->SetSysInfo(slinfo->GetSysInfo());
3859  ourwi->fHostName = slinfo->GetName();
3860  if (slinfo->GetDataDir() && (strlen(slinfo->GetDataDir()) > 0))
3861  ourwi->fDataDir = slinfo->GetDataDir();
3862  break;
3863  }
3864  }
3865  if (!ourwi) {
3866  fSlaveInfo->Add(slinfo);
3867  } else {
3868  slinfo = ourwi;
3869  }
3870  if (slinfo->fStatus != TSlaveInfo::kBad) {
3871  if (!active) slinfo->SetStatus(TSlaveInfo::kNotActive);
3872  if (bad) slinfo->SetStatus(TSlaveInfo::kBad);
3873  }
3874  if (sl->GetMsd() && (strlen(sl->GetMsd()) > 0))
3875  slinfo->fMsd = sl->GetMsd();
3876  }
3877  }
3878  delete tmpinfo;
3879  rc = 1;
3880  }
3881  }
3882  break;
3883 
3884  case kPROOF_VALIDATE_DSET:
3885  {
3886  PDB(kGlobal,2)
3887  Info("HandleInputMessage", "kPROOF_VALIDATE_DSET: enter");
3888  TDSet* dset = 0;
3889  (*mess) >> dset;
3890  if (!fDSet)
3891  Error("HandleInputMessage", "kPROOF_VALIDATE_DSET: fDSet not set");
3892  else
3893  fDSet->Validate(dset);
3894  delete dset;
3895  }
3896  break;
3897 
3898  case kPROOF_DATA_READY:
3899  {
3900  PDB(kGlobal,2) Info("HandleInputMessage", "kPROOF_DATA_READY: enter");
3901  Bool_t dataready = kFALSE;
3902  Long64_t totalbytes, bytesready;
3903  (*mess) >> dataready >> totalbytes >> bytesready;
3904  fTotalBytes += totalbytes;
3905  fBytesReady += bytesready;
3906  if (dataready == kFALSE) fDataReady = dataready;
3907  }
3908  break;
3909 
3910  case kPROOF_PING:
3911  // do nothing (ping is already acknowledged)
3912  break;
3913 
3914  case kPROOF_MESSAGE:
3915  {
3916  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_MESSAGE: enter");
3917 
3918  // We have received the unique tag and save it as name of this object
3919  TString msg;
3920  (*mess) >> msg;
3921  Bool_t lfeed = kTRUE;
3922  if ((mess->BufferSize() > mess->Length()))
3923  (*mess) >> lfeed;
3924 
3925  if (TestBit(TProof::kIsClient)) {
3926 
3927  if (fSync) {
3928  // Notify locally
3929  fprintf(stderr,"%s%c", msg.Data(), (lfeed ? '\n' : '\r'));
3930  } else {
3931  // Notify locally taking care of redirection, windows logs, ...
3932  NotifyLogMsg(msg, (lfeed ? "\n" : "\r"));
3933  }
3934  } else {
3935 
3936  // The message is logged for debugging purposes.
3937  fprintf(stderr,"%s%c", msg.Data(), (lfeed ? '\n' : '\r'));
3938  if (gProofServ) {
3939  // We hide it during normal operations
3940  gProofServ->FlushLogFile();
3941 
3942  // And send the message one level up
3943  gProofServ->SendAsynMessage(msg, lfeed);
3944  }
3945  }
3946  }
3947  break;
3948 
3949  case kPROOF_VERSARCHCOMP:
3950  {
3951  TString vac;
3952  (*mess) >> vac;
3953  PDB(kGlobal,2) Info("HandleInputMessage","kPROOF_VERSARCHCOMP: %s", vac.Data());
3954  Int_t from = 0;
3955  TString vers, archcomp;
3956  if (vac.Tokenize(vers, from, "|"))
3957  vac.Tokenize(archcomp, from, "|");
3958  sl->SetArchCompiler(archcomp);
3959  vers.ReplaceAll(":","|");
3960  sl->SetROOTVersion(vers);
3961  }
3962  break;
3963 
3964  default:
3965  {
3966  Error("HandleInputMessage", "unknown command received from '%s' (what = %d)",
3967  sl->GetOrdinal(), what);
3968  }
3969  break;
3970  }
3971 
3972  // Cleanup
3973  if (delete_mess)
3974  delete mess;
3975 
3976  // We are done successfully
3977  return rc;
3978 }
3979 
3980 ////////////////////////////////////////////////////////////////////////////////
3981 /// Process a message of type kPROOF_SUBMERGER
3982 
3983 void TProof::HandleSubmerger(TMessage *mess, TSlave *sl)
3984 {
3985  // Message sub-type
3986  Int_t type = 0;
3987  (*mess) >> type;
3988  TSocket *s = sl->GetSocket();
3989 
3990  switch (type) {
3991  case kOutputSent:
3992  {
3993  if (IsEndMaster()) {
3994  Int_t merger_id = -1;
3995  (*mess) >> merger_id;
3996 
3997  PDB(kSubmerger, 2)
3998  Info("HandleSubmerger", "kOutputSent: Worker %s:%d:%s had sent its output to merger #%d",
3999  sl->GetName(), sl->GetPort(), sl->GetOrdinal(), merger_id);
4000 
4001  if (!fMergers || fMergers->GetSize() <= merger_id) {
4002  Error("HandleSubmerger", "kOutputSize: #%d not in list ", merger_id);
4003  break;
4004  }
4005  TMergerInfo * mi = (TMergerInfo *) fMergers->At(merger_id);
4006  mi->SetMergedWorker();
4007  if (mi->AreAllWorkersMerged()) {
4008  mi->Deactivate();
4009  if (GetActiveMergersCount() == 0) {
4010  fMergers->Clear();
4011  delete fMergers;
4012  fMergersSet = kFALSE;
4013  fMergersCount = -1;
4014  fLastAssignedMerger = 0;
4015  PDB(kSubmerger, 2) Info("HandleSubmerger", "all mergers removed ... ");
4016  }
4017  }
4018  } else {
4019  PDB(kSubmerger, 2) Error("HandleSubmerger","kOutputSent: received not on endmaster!");
4020  }
4021  }
4022  break;
4023 
4024  case kMergerDown:
4025  {
4026  Int_t merger_id = -1;
4027  (*mess) >> merger_id;
4028 
4029  PDB(kSubmerger, 2) Info("HandleSubmerger", "kMergerDown: #%d ", merger_id);
4030 
4031  if (!fMergers || fMergers->GetSize() <= merger_id) {
4032  Error("HandleSubmerger", "kMergerDown: #%d not in list ", merger_id);
4033  break;
4034  }
4035 
4036  TMergerInfo * mi = (TMergerInfo *) fMergers->At(merger_id);
4037  if (!mi->IsActive()) {
4038  break;
4039  } else {
4040  mi->Deactivate();
4041  }
4042 
4043  // Stop the invalid merger in the case it is still listening
4044  TMessage stop(kPROOF_SUBMERGER);
4045  stop << Int_t(kStopMerging);
4046  stop << 0;
4047  s->Send(stop);
4048 
4049  // Ask for results from merger (only original results from this node as worker are returned)
4050  AskForOutput(mi->GetMerger());
4051 
4052  // Ask for results from all workers assigned to this merger
4053  TIter nxo(mi->GetWorkers());
4054  TObject * o = 0;
4055  while ((o = nxo())) {
4056  AskForOutput((TSlave *)o);
4057  }
4058  PDB(kSubmerger, 2) Info("HandleSubmerger", "kMergerDown:%d: exit", merger_id);
4059  }
4060  break;
4061 
4062  case kOutputSize:
4063  {
4064  if (IsEndMaster()) {
4065  PDB(kSubmerger, 2)
4066  Info("HandleSubmerger", "worker %s reported as finished ", sl->GetOrdinal());
4067 
4068  const char *prefix = gProofServ ? gProofServ->GetPrefix() : "Lite-0";
4069  if (!fFinalizationRunning) {
4070  Info("HandleSubmerger", "finalization on %s started ...", prefix);
4071  fFinalizationRunning = kTRUE;
4072  }
4073 
4074  Int_t output_size = 0;
4075  Int_t merging_port = 0;
4076  (*mess) >> output_size >> merging_port;
4077 
4078  PDB(kSubmerger, 2) Info("HandleSubmerger",
4079  "kOutputSize: Worker %s:%d:%s reports %d output objects (+ available port %d)",
4080  sl->GetName(), sl->GetPort(), sl->GetOrdinal(), output_size, merging_port);
4081  TString msg;
4082  if (!fMergersSet) {
4083 
4084  Int_t activeWorkers = fCurrentMonitor ? fCurrentMonitor->GetActive() : GetNumberOfActiveSlaves();
4085 
4086  // First pass - setting number of mergers according to user or dynamically
4087  fMergersCount = -1; // No mergers used if not set by user
4088  TParameter<Int_t> *mc = dynamic_cast<TParameter<Int_t> *>(GetParameter("PROOF_UseMergers"));
4089  if (mc) fMergersCount = mc->GetVal(); // Value set by user
4090  TParameter<Int_t> *mh = dynamic_cast<TParameter<Int_t> *>(GetParameter("PROOF_MergersByHost"));
4091  if (mh) fMergersByHost = (mh->GetVal() != 0) ? kTRUE : kFALSE; // Assign submergers by hostname
4092 
4093  // Mergers count specified by user but not valid
4094  if (fMergersCount < 0 || (fMergersCount > (activeWorkers/2) )) {
4095  msg.Form("%s: Invalid request: cannot start %d mergers for %d workers",
4096  prefix, fMergersCount, activeWorkers);
4097  if (gProofServ)
4098  gProofServ->SendAsynMessage(msg);
4099  else
4100  Printf("%s",msg.Data());
4101  fMergersCount = 0;
4102  }
4103  // Mergers count will be set dynamically
4104  if ((fMergersCount == 0) && (!fMergersByHost)) {
4105  if (activeWorkers > 1) {
4106  fMergersCount = TMath::Nint(TMath::Sqrt(activeWorkers));
4107  if (activeWorkers / fMergersCount < 2)
4108  fMergersCount = (Int_t) TMath::Sqrt(activeWorkers);
4109  }
4110  if (fMergersCount > 1)
4111  msg.Form("%s: Number of mergers set dynamically to %d (for %d workers)",
4112  prefix, fMergersCount, activeWorkers);
4113  else {
4114  msg.Form("%s: No mergers will be used for %d workers",
4115  prefix, activeWorkers);
4116  fMergersCount = -1;
4117  }
4118  if (gProofServ)
4119  gProofServ->SendAsynMessage(msg);
4120  else
4121  Printf("%s",msg.Data());
4122  } else if (fMergersByHost) {
4123  // We force mergers at host level to minimize network traffic
4124  if (activeWorkers > 1) {
4125  fMergersCount = 0;
4126  THashList hosts;
4127  TIter nxwk(fSlaves);
4128  TObject *wrk = 0;
4129  while ((wrk = nxwk())) {
4130  if (!hosts.FindObject(wrk->GetName())) {
4131  hosts.Add(new TObjString(wrk->GetName()));
4132  fMergersCount++;
4133  }
4134  }
4135  }
4136  if (fMergersCount > 1)
4137  msg.Form("%s: Number of mergers set to %d (for %d workers), one for each slave host",
4138  prefix, fMergersCount, activeWorkers);
4139  else {
4140  msg.Form("%s: No mergers will be used for %d workers",
4141  prefix, activeWorkers);
4142  fMergersCount = -1;
4143  }
4144  if (gProofServ)
4145  gProofServ->SendAsynMessage(msg);
4146  else
4147  Printf("%s",msg.Data());
4148  } else {
4149  msg.Form("%s: Number of mergers set by user to %d (for %d workers)",
4150  prefix, fMergersCount, activeWorkers);
4151  if (gProofServ)
4152  gProofServ->SendAsynMessage(msg);
4153  else
4154  Printf("%s",msg.Data());
4155  }
4156 
4157  // We started merging; we call it here because fMergersCount is still the original number
4158  // and can be saved internally
4159  fPlayer->SetMerging(kTRUE);
4160 
4161  // Update merger counters (new workers are not yet active)
4162  fMergePrg.SetNWrks(fMergersCount);
4163 
4164  if (fMergersCount > 0) {
4165 
4166  fMergers = new TList();
4167  fLastAssignedMerger = 0;
4168  // Total number of workers, which will not act as mergers ('pure workers')
4169  fWorkersToMerge = (activeWorkers - fMergersCount);
4170  // Establish the first merger
4171  if (!CreateMerger(sl, merging_port)) {
4172  // Cannot establish first merger
4173  AskForOutput(sl);
4174  fWorkersToMerge--;
4175  fMergersCount--;
4176  }
4177  if (IsLite()) fMergePrg.SetNWrks(fMergersCount);
4178  } else {
4179  AskForOutput(sl);
4180  }
4181  fMergersSet = kTRUE;
4182  } else {
4183  // Multiple pass
4184  if (fMergersCount == -1) {
4185  // No mergers. Workers send their outputs directly to master
4186  AskForOutput(sl);
4187  } else {
4188  if ((fRedirectNext > 0 ) && (!fMergersByHost)) {
4189  RedirectWorker(s, sl, output_size);
4190  fRedirectNext--;
4191  } else {
4192  Bool_t newMerger = kTRUE;
4193  if (fMergersByHost) {
4194  TIter nxmg(fMergers);
4195  TMergerInfo *mgi = 0;
4196  while ((mgi = (TMergerInfo *) nxmg())) {
4197  if (!strcmp(sl->GetName(), mgi->GetMerger()->GetName())) {
4198  newMerger = kFALSE;
4199  break;
4200  }
4201  }
4202  }
4203  if ((fMergersCount > fMergers->GetSize()) && newMerger) {
4204  // Still not enough mergers established
4205  if (!CreateMerger(sl, merging_port)) {
4206  // Cannot establish a merger
4207  AskForOutput(sl);
4208  fWorkersToMerge--;
4209  fMergersCount--;
4210  }
4211  } else
4212  RedirectWorker(s, sl, output_size);
4213  }
4214  }
4215  }
4216  } else {
4217  Error("HandleSubMerger","kOutputSize received not on endmaster!");
4218  }
4219  }
4220  break;
4221  }
4222 }
4223 
4224 ////////////////////////////////////////////////////////////////////////////////
4225 /// Redirect output of worker sl to some merger
4226 
4227 void TProof::RedirectWorker(TSocket *s, TSlave * sl, Int_t output_size)
4228 {
4229  Int_t merger_id = -1;
4230 
4231  if (fMergersByHost) {
4232  for (Int_t i = 0; i < fMergers->GetSize(); i++) {
4233  TMergerInfo *mgi = (TMergerInfo *)fMergers->At(i);
4234  if (!strcmp(sl->GetName(), mgi->GetMerger()->GetName())) {
4235  merger_id = i;
4236  break;
4237  }
4238  }
4239  } else {
4240  merger_id = FindNextFreeMerger();
4241  }
4242 
4243  if (merger_id == -1) {
4244  // No free merger (probably it had crashed before)
4245  AskForOutput(sl);
4246  } else {
4247  TMessage sendoutput(kPROOF_SUBMERGER);
4248  sendoutput << Int_t(kSendOutput);
4249  PDB(kSubmerger, 2)
4250  Info("RedirectWorker", "redirecting worker %s to merger %d", sl->GetOrdinal(), merger_id);
4251 
4252  PDB(kSubmerger, 2) Info("RedirectWorker", "redirecting output to merger #%d", merger_id);
4253  if (!fMergers || fMergers->GetSize() <= merger_id) {
4254  Error("RedirectWorker", "#%d not in list ", merger_id);
4255  return;
4256  }
4257  TMergerInfo * mi = (TMergerInfo *) fMergers->At(merger_id);
4258 
4259  TString hname = (IsLite()) ? "localhost" : mi->GetMerger()->GetName();
4260  sendoutput << merger_id;
4261  sendoutput << hname;
4262  sendoutput << mi->GetPort();
4263  s->Send(sendoutput);
4264  mi->AddMergedObjects(output_size);
4265  mi->AddWorker(sl);
4266  }
4267 }
4268 
4269 ////////////////////////////////////////////////////////////////////////////////
4270 /// Return a merger, which is both active and still accepts some workers to be
4271 /// assigned to it. It works on the 'round-robin' basis.
4272 
4273 Int_t TProof::FindNextFreeMerger()
4274 {
4275  while (fLastAssignedMerger < fMergers->GetSize() &&
4276  (!((TMergerInfo*)fMergers->At(fLastAssignedMerger))->IsActive() ||
4277  ((TMergerInfo*)fMergers->At(fLastAssignedMerger))->AreAllWorkersAssigned())) {
4278  fLastAssignedMerger++;
4279  }
4280 
4281  if (fLastAssignedMerger == fMergers->GetSize()) {
4282  fLastAssignedMerger = 0;
4283  } else {
4284  return fLastAssignedMerger++;
4285  }
4286 
4287  while (fLastAssignedMerger < fMergers->GetSize() &&
4288  (!((TMergerInfo*)fMergers->At(fLastAssignedMerger))->IsActive() ||
4289  ((TMergerInfo*)fMergers->At(fLastAssignedMerger))->AreAllWorkersAssigned())) {
4290  fLastAssignedMerger++;
4291  }
4292 
4293  if (fLastAssignedMerger == fMergers->GetSize()) {
4294  return -1;
4295  } else {
4296  return fLastAssignedMerger++;
4297  }
4298 }
4299 
4300 ////////////////////////////////////////////////////////////////////////////////
4301 /// Master asks for output from worker sl
4302 
4303 void TProof::AskForOutput(TSlave *sl)
4304 {
4305  TMessage sendoutput(kPROOF_SUBMERGER);
4306  sendoutput << Int_t(kSendOutput);
4307 
4308  PDB(kSubmerger, 2) Info("AskForOutput",
4309  "worker %s was asked to send its output to master",
4310  sl->GetOrdinal());
4311 
4312  sendoutput << -1;
4313  sendoutput << TString("master");
4314  sendoutput << -1;
4315  sl->GetSocket()->Send(sendoutput);
4316  if (IsLite()) fMergePrg.IncreaseNWrks();
4317 }
4318 
4319 ////////////////////////////////////////////////////////////////////////////////
4320 /// Final update of the progress dialog
4321 
4322 void TProof::UpdateDialog()
4323 {
4324  if (!fPlayer) return;
4325 
4326  // Handle abort ...
4327  if (fPlayer->GetExitStatus() == TVirtualProofPlayer::kAborted) {
4328  if (fSync)
4329  Info("UpdateDialog",
4330  "processing was aborted - %lld events processed",
4331  fPlayer->GetEventsProcessed());
4332 
4333  if (GetRemoteProtocol() > 11) {
4334  // New format
4335  Progress(-1, fPlayer->GetEventsProcessed(), -1, -1., -1., -1., -1.);
4336  } else {
4337  Progress(-1, fPlayer->GetEventsProcessed());
4338  }
4339  Emit("StopProcess(Bool_t)", kTRUE);
4340  }
4341 
4342  // Handle stop ...
4343  if (fPlayer->GetExitStatus() == TVirtualProofPlayer::kStopped) {
4344  if (fSync)
4345  Info("UpdateDialog",
4346  "processing was stopped - %lld events processed",
4347  fPlayer->GetEventsProcessed());
4348 
4349  if (GetRemoteProtocol() > 25) {
4350  // New format
4351  Progress(-1, fPlayer->GetEventsProcessed(), -1, -1., -1., -1., -1., -1, -1, -1.);
4352  } else if (GetRemoteProtocol() > 11) {
4353  Progress(-1, fPlayer->GetEventsProcessed(), -1, -1., -1., -1., -1.);
4354  } else {
4355  Progress(-1, fPlayer->GetEventsProcessed());
4356  }
4357  Emit("StopProcess(Bool_t)", kFALSE);
4358  }
4359 
4360  // Final update of the dialog box
4361  if (GetRemoteProtocol() > 25) {
4362  // New format
4363  EmitVA("Progress(Long64_t,Long64_t,Long64_t,Float_t,Float_t,Float_t,Float_t,Int_t,Int_t,Float_t)",
4364  10, (Long64_t)(-1), (Long64_t)(-1), (Long64_t)(-1),(Float_t)(-1.),(Float_t)(-1.),
4365  (Float_t)(-1.),(Float_t)(-1.),(Int_t)(-1),(Int_t)(-1),(Float_t)(-1.));
4366  } else if (GetRemoteProtocol() > 11) {
4367  // New format
4368  EmitVA("Progress(Long64_t,Long64_t,Long64_t,Float_t,Float_t,Float_t,Float_t)",
4369  7, (Long64_t)(-1), (Long64_t)(-1), (Long64_t)(-1),
4370  (Float_t)(-1.),(Float_t)(-1.),(Float_t)(-1.),(Float_t)(-1.));
4371  } else {
4372  EmitVA("Progress(Long64_t,Long64_t)", 2, (Long64_t)(-1), (Long64_t)(-1));
4373  }
4374 }
4375 
4376 ////////////////////////////////////////////////////////////////////////////////
4377 /// Activate the a-sync input handler.
4378 
4379 void TProof::ActivateAsyncInput()
4380 {
4381  TIter next(fSlaves);
4382  TSlave *sl;
4383 
4384  while ((sl = (TSlave*) next()))
4385  if (sl->GetInputHandler())
4386  sl->GetInputHandler()->Add();
4387 }
4388 
4389 ////////////////////////////////////////////////////////////////////////////////
4390 /// De-activate a-sync input handler.
4391 
4392 void TProof::DeActivateAsyncInput()
4393 {
4394  TIter next(fSlaves);
4395  TSlave *sl;
4396 
4397  while ((sl = (TSlave*) next()))
4398  if (sl->GetInputHandler())
4399  sl->GetInputHandler()->Remove();
4400 }
4401 
4402 ////////////////////////////////////////////////////////////////////////////////
4403 /// Get the active mergers count
4404 
4405 Int_t TProof::GetActiveMergersCount()
4406 {
4407  if (!fMergers) return 0;
4408 
4409  Int_t active_mergers = 0;
4410 
4411  TIter mergers(fMergers);
4412  TMergerInfo *mi = 0;
4413  while ((mi = (TMergerInfo *)mergers())) {
4414  if (mi->IsActive()) active_mergers++;
4415  }
4416 
4417  return active_mergers;
4418 }
4419 
4420 ////////////////////////////////////////////////////////////////////////////////
4421 /// Create a new merger
4422 
4423 Bool_t TProof::CreateMerger(TSlave *sl, Int_t port)
4424 {
4425  PDB(kSubmerger, 2)
4426  Info("CreateMerger", "worker %s will be merger ", sl->GetOrdinal());
4427 
4428  PDB(kSubmerger, 2) Info("CreateMerger","Begin");
4429 
4430  if (port <= 0) {
4431  PDB(kSubmerger,2)
4432  Info("CreateMerger", "cannot create merger on port %d - exit", port);
4433  return kFALSE;
4434  }
4435 
4436  Int_t workers = -1;
4437  if (!fMergersByHost) {
4438  Int_t mergersToCreate = fMergersCount - fMergers->GetSize();
4439  // Number of pure workers, which are not simply divisible by mergers
4440  Int_t rest = fWorkersToMerge % mergersToCreate;
4441  // We add one more worker for each of the first 'rest' mergers being established
4442  if (rest > 0 && fMergers->GetSize() < rest) {
4443  rest = 1;
4444  } else {
4445  rest = 0;
4446  }
4447  workers = (fWorkersToMerge / mergersToCreate) + rest;
4448  } else {
4449  Int_t workersOnHost = 0;
4450  for (Int_t i = 0; i < fActiveSlaves->GetSize(); i++) {
4451  if(!strcmp(sl->GetName(), fActiveSlaves->At(i)->GetName())) workersOnHost++;
4452  }
4453  workers = workersOnHost - 1;
4454  }
4455 
4456  TString msg;
4457  msg.Form("worker %s on host %s will be merger for %d additional workers", sl->GetOrdinal(), sl->GetName(), workers);
4458 
4459  if (gProofServ) {
4460  gProofServ->SendAsynMessage(msg);
4461  } else {
4462  Printf("%s",msg.Data());
4463  }
4464  TMergerInfo * merger = new TMergerInfo(sl, port, workers);
4465 
4466  TMessage bemerger(kPROOF_SUBMERGER);
4467  bemerger << Int_t(kBeMerger);
4468  bemerger << fMergers->GetSize();
4469  bemerger << workers;
4470  sl->GetSocket()->Send(bemerger);
4471 
4472  PDB(kSubmerger,2) Info("CreateMerger",
4473  "merger #%d (port: %d) for %d workers started",
4474  fMergers->GetSize(), port, workers);
4475 
4476  fMergers->Add(merger);
4477  fWorkersToMerge = fWorkersToMerge - workers;
4478 
4479  fRedirectNext = workers / 2;
4480 
4481  PDB(kSubmerger, 2) Info("CreateMerger", "exit");
4482  return kTRUE;
4483 }
4484 
4485 ////////////////////////////////////////////////////////////////////////////////
4486 /// Add a bad slave server to the bad slave list and remove it from
4487 /// the active list and from the two monitor objects. Assume that the work
4488 /// done by this worker was lost and ask packerizer to reassign it.
4489 
4490 void TProof::MarkBad(TSlave *wrk, const char *reason)
4491 {
4492  std::lock_guard<std::recursive_mutex> lock(fCloseMutex);
4493 
4494  // We may have been invalidated in the meanwhile: nothing to do in such a case
4495  if (!IsValid()) return;
4496 
4497  if (!wrk) {
4498  Error("MarkBad", "worker instance undefined: protocol error? ");
4499  return;
4500  }
4501 
4502  // Local URL
4503  static TString thisurl;
4504  if (thisurl.IsNull()) {
4505  if (IsMaster()) {
4506  Int_t port = gEnv->GetValue("ProofServ.XpdPort",-1);
4507  thisurl = TUrl(gSystem->HostName()).GetHostFQDN();
4508  if (port > 0) thisurl += TString::Format(":%d", port);
4509  } else {
4510  thisurl.Form("%s@%s:%d", fUrl.GetUser(), fUrl.GetHost(), fUrl.GetPort());
4511  }
4512  }
4513 
4514  if (!reason || (strcmp(reason, kPROOF_TerminateWorker) && strcmp(reason, kPROOF_WorkerIdleTO))) {
4515  // Message for notification
4516  const char *mastertype = (gProofServ && gProofServ->IsTopMaster()) ? "top master" : "master";
4517  TString src = IsMaster() ? Form("%s at %s", mastertype, thisurl.Data()) : "local session";
4518  TString msg;
4519  msg.Form("\n +++ Message from %s : marking %s:%d (%s) as bad\n +++ Reason: %s",
4520  src.Data(), wrk->GetName(), wrk->GetPort(), wrk->GetOrdinal(),
4521  (reason && strlen(reason)) ? reason : "unknown");
4522  Info("MarkBad", "%s", msg.Data());
4523  // Notify one level up, if the case
4524  // Add some hint for diagnostics
4525  if (gProofServ) {
4526  msg += TString::Format("\n\n +++ Most likely your code crashed on worker %s at %s:%d.\n",
4527  wrk->GetOrdinal(), wrk->GetName(), wrk->GetPort());
4528  } else {
4529  msg += TString::Format("\n\n +++ Most likely your code crashed\n");
4530  }
4531  msg += TString::Format(" +++ Please check the session logs for error messages either using\n");
4532  msg += TString::Format(" +++ the 'Show logs' button or executing\n");
4533  msg += TString::Format(" +++\n");
4534  if (gProofServ) {
4535  msg += TString::Format(" +++ root [] TProof::Mgr(\"%s\")->GetSessionLogs()->"
4536  "Display(\"%s\",0)\n\n", thisurl.Data(), wrk->GetOrdinal());
4537  gProofServ->SendAsynMessage(msg, kTRUE);
4538  } else {
4539  msg += TString::Format(" +++ root [] TProof::Mgr(\"%s\")->GetSessionLogs()->"
4540  "Display(\"*\")\n\n", thisurl.Data());
4541  Printf("%s", msg.Data());
4542  }
4543  } else if (reason) {
4544  if (gDebug > 0 && strcmp(reason, kPROOF_WorkerIdleTO)) {
4545  Info("MarkBad", "worker %s at %s:%d asked to terminate",
4546  wrk->GetOrdinal(), wrk->GetName(), wrk->GetPort());
4547  }
4548  }
4549 
4550  if (IsMaster() && reason) {
4551  if (strcmp(reason, kPROOF_TerminateWorker)) {
4552  // if the reason was not a planned termination
4553  TList *listOfMissingFiles = 0;
4554  if (!(listOfMissingFiles = (TList *)GetOutput("MissingFiles"))) {
4555  listOfMissingFiles = new TList();
4556  listOfMissingFiles->SetName("MissingFiles");
4557  if (fPlayer)
4558  fPlayer->AddOutputObject(listOfMissingFiles);
4559  }
4560  // If a query is being processed, assume that the work done by
4561  // the worker was lost and needs to be reassigned.
4562  TVirtualPacketizer *packetizer = fPlayer ? fPlayer->GetPacketizer() : 0;
4563  if (packetizer) {
4564  // the worker was lost so do resubmit the packets
4565  packetizer->MarkBad(wrk, 0, &listOfMissingFiles);
4566  }
4567  } else {
4568  // Tell the coordinator that we are gone
4569  if (gProofServ) {
4570  TString ord(wrk->GetOrdinal());
4571  Int_t id = ord.Last('.');
4572  if (id != kNPOS) ord.Remove(0, id+1);
4573  gProofServ->ReleaseWorker(ord.Data());
4574  }
4575  }
4576  } else if (TestBit(TProof::kIsClient) && reason && !strcmp(reason, kPROOF_WorkerIdleTO)) {
4577  // We are invalid after this
4578  fValid = kFALSE;
4579  }
4580 
4581  fActiveSlaves->Remove(wrk);
4582  FindUniqueSlaves();
4583 
4584  fAllMonitor->Remove(wrk->GetSocket());
4585  fActiveMonitor->Remove(wrk->GetSocket());
4586 
4587  fSendGroupView = kTRUE;
4588 
4589  if (IsMaster()) {
4590  if (reason && !strcmp(reason, kPROOF_TerminateWorker)) {
4591  // if the reason was a planned termination then delete the worker and
4592  // remove it from all the lists
4593  fSlaves->Remove(wrk);
4594  fBadSlaves->Remove(wrk);
4595  fActiveSlaves->Remove(wrk);
4596  fInactiveSlaves->Remove(wrk);
4597  fUniqueSlaves->Remove(wrk);
4598  fAllUniqueSlaves->Remove(wrk);
4599  fNonUniqueMasters->Remove(wrk);
4600 
4601  // we add it to the list of terminated slave infos instead, so that it
4602  // stays available in the .workers persistent file
4603  TSlaveInfo *si = new TSlaveInfo(
4604  wrk->GetOrdinal(),
4605  Form("%s@%s:%d", wrk->GetUser(), wrk->GetName(), wrk->GetPort()),
4606  0, "", wrk->GetWorkDir());
4607  if (!fTerminatedSlaveInfos->Contains(si)) fTerminatedSlaveInfos->Add(si);
4608  else delete si;
4609 
4610  delete wrk;
4611  } else {
4612  fBadSlaves->Add(wrk);
4613  fActiveSlaves->Remove(wrk);
4614  fUniqueSlaves->Remove(wrk);
4615  fAllUniqueSlaves->Remove(wrk);
4616  fNonUniqueMasters->Remove(wrk);
4617  if (fCurrentMonitor) fCurrentMonitor->DeActivate(wrk->GetSocket());
4618  wrk->Close();
4619  // Update the mergers count, if needed
4620  if (fMergersSet) {
4621  Int_t mergersCount = -1;
4622  TParameter<Int_t> *mc = dynamic_cast<TParameter<Int_t> *>(GetParameter("PROOF_UseMergers"));
4623  if (mc) mergersCount = mc->GetVal(); // Value set by user
4624  // Mergers count is set dynamically: recalculate it
4625  if (mergersCount == 0) {
4626  Int_t activeWorkers = fCurrentMonitor ? fCurrentMonitor->GetActive() : GetNumberOfActiveSlaves();
4627  if (activeWorkers > 1) {
4628  fMergersCount = TMath::Nint(TMath::Sqrt(activeWorkers));
4629  if (activeWorkers / fMergersCount < 2)
4630  fMergersCount = (Int_t) TMath::Sqrt(activeWorkers);
4631  }
4632  }
4633  }
4634  }
4635 
4636  // Update session workers files
4637  SaveWorkerInfo();
4638  } else {
4639  // On clients the proof session should be removed from the lists
4640  // and deleted, since it is not valid anymore
4641  fSlaves->Remove(wrk);
4642  if (fManager)
4643  fManager->DiscardSession(this);
4644  }
4645 }
4646 
4647 ////////////////////////////////////////////////////////////////////////////////
4648 /// Add slave with socket s to the bad slave list and remove if from
4649 /// the active list and from the two monitor objects.
4650 
4651 void TProof::MarkBad(TSocket *s, const char *reason)
4652 {
4653  std::lock_guard<std::recursive_mutex> lock(fCloseMutex);
4654 
4655  // We may have been invalidated in the meanwhile: nothing to do in such a case
4656  if (!IsValid()) return;
4657 
4658  TSlave *wrk = FindSlave(s);
4659  MarkBad(wrk, reason);
4660 }
4661 
4662 ////////////////////////////////////////////////////////////////////////////////
4663 /// Ask an active worker 'wrk' to terminate, i.e. to shutdown
4664 
4665 void TProof::TerminateWorker(TSlave *wrk)
4666 {
4667  if (!wrk) {
4668  Warning("TerminateWorker", "worker instance undefined: protocol error? ");
4669  return;
4670  }
4671 
4672  // Send stop message
4673  if (wrk->GetSocket() && wrk->GetSocket()->IsValid()) {
4674  TMessage mess(kPROOF_STOP);
4675  wrk->GetSocket()->Send(mess);
4676  } else {
4677  if (gDebug > 0)
4678  Info("TerminateWorker", "connection to worker is already down: cannot"
4679  " send termination message");
4680  }
4681 
4682  // This is a bad worker from now on
4683  MarkBad(wrk, kPROOF_TerminateWorker);
4684 }
4685 
4686 ////////////////////////////////////////////////////////////////////////////////
4687 /// Ask an active worker 'ord' to terminate, i.e. to shutdown
4688 
4689 void TProof::TerminateWorker(const char *ord)
4690 {
4691  if (ord && strlen(ord) > 0) {
4692  Bool_t all = (ord[0] == '*') ? kTRUE : kFALSE;
4693  if (IsMaster()) {
4694  TIter nxw(fSlaves);
4695  TSlave *wrk = 0;
4696  while ((wrk = (TSlave *)nxw())) {
4697  if (all || !strcmp(wrk->GetOrdinal(), ord)) {
4698  TerminateWorker(wrk);
4699  if (!all) break;
4700  }
4701  }
4702  } else {
4703  TMessage mess(kPROOF_STOP);
4704  mess << TString(ord);
4705  Broadcast(mess);
4706  }
4707  }
4708 }
4709 
4710 ////////////////////////////////////////////////////////////////////////////////
4711 /// Ping PROOF. Returns 1 if master server responded.
4712 
4713 Int_t TProof::Ping()
4714 {
4715  return Ping(kActive);
4716 }
4717 
4718 ////////////////////////////////////////////////////////////////////////////////
4719 /// Ping PROOF slaves. Returns the number of slaves that responded.
4720 
4721 Int_t TProof::Ping(ESlaves list)
4722 {
4723  TList *slaves = 0;
4724  if (list == kAll) slaves = fSlaves;
4725  if (list == kActive) slaves = fActiveSlaves;
4726  if (list == kUnique) slaves = fUniqueSlaves;
4727  if (list == kAllUnique) slaves = fAllUniqueSlaves;
4728 
4729  if (slaves->GetSize() == 0) return 0;
4730 
4731  int nsent = 0;
4732  TIter next(slaves);
4733 
4734  TSlave *sl;
4735  while ((sl = (TSlave *)next())) {
4736  if (sl->IsValid()) {
4737  if (sl->Ping() == -1) {
4738  MarkBad(sl, "ping unsuccessful");
4739  } else {
4740  nsent++;
4741  }
4742  }
4743  }
4744 
4745  return nsent;
4746 }
4747 
4748 ////////////////////////////////////////////////////////////////////////////////
4749 /// Ping PROOF slaves. Returns the number of slaves that responded.
4750 
4751 void TProof::Touch()
4752 {
4753  TList *slaves = fSlaves;
4754 
4755  if (slaves->GetSize() == 0) return;
4756 
4757  TIter next(slaves);
4758 
4759  TSlave *sl;
4760  while ((sl = (TSlave *)next())) {
4761  if (sl->IsValid()) {
4762  sl->Touch();
4763  }
4764  }
4765 
4766  return;
4767 }
4768 
4769 ////////////////////////////////////////////////////////////////////////////////
4770 /// Print status of PROOF cluster.
4771 
4772 void TProof::Print(Option_t *option) const
4773 {
4774  TString secCont;
4775 
4776  if (TestBit(TProof::kIsClient)) {
4777  Printf("Connected to: %s (%s)", GetMaster(),
4778  IsValid() ? "valid" : "invalid");
4779  Printf("Port number: %d", GetPort());
4780  Printf("User: %s", GetUser());
4781  Printf("ROOT version|rev: %s|%s", gROOT->GetVersion(), gROOT->GetGitCommit());
4782  Printf("Architecture-Compiler: %s-%s", gSystem->GetBuildArch(),
4783  gSystem->GetBuildCompilerVersion());
4784  TSlave *sl = (TSlave *)fActiveSlaves->First();
4785  if (sl) {
4786  TString sc;
4787  if (sl->GetSocket()->GetSecContext())
4788  Printf("Security context: %s",
4789  sl->GetSocket()->GetSecContext()->AsString(sc));
4790  Printf("Proofd protocol version: %d", sl->GetSocket()->GetRemoteProtocol());
4791  } else {
4792  Printf("Security context: Error - No connection");
4793  Printf("Proofd protocol version: Error - No connection");
4794  }
4795  Printf("Client protocol version: %d", GetClientProtocol());
4796  Printf("Remote protocol version: %d", GetRemoteProtocol());
4797  Printf("Log level: %d", GetLogLevel());
4798  Printf("Session unique tag: %s", IsValid() ? GetSessionTag() : "");
4799  Printf("Default data pool: %s", IsValid() ? GetDataPoolUrl() : "");
4800  if (IsValid())
4801  const_cast<TProof*>(this)->SendPrint(option);
4802  } else {
4803  const_cast<TProof*>(this)->AskStatistics();
4804  if (IsParallel())
4805  Printf("*** Master server %s (parallel mode, %d workers):",
4806  gProofServ->GetOrdinal(), GetParallel());
4807  else
4808  Printf("*** Master server %s (sequential mode):",
4809  gProofServ->GetOrdinal());
4810 
4811  Printf("Master host name: %s", gSystem->HostName());
4812  Printf("Port number: %d", GetPort());
4813  if (strlen(gProofServ->GetGroup()) > 0) {
4814  Printf("User/Group: %s/%s", GetUser(), gProofServ->GetGroup());
4815  } else {
4816  Printf("User: %s", GetUser());
4817  }
4818  TString ver;
4819  ver.Form("%s|%s", gROOT->GetVersion(), gROOT->GetGitCommit());
4820  if (gSystem->Getenv("ROOTVERSIONTAG"))
4821  ver.Form("%s|%s", gROOT->GetVersion(), gSystem->Getenv("ROOTVERSIONTAG"));
4822  Printf("ROOT version|rev|tag: %s", ver.Data());
4823  Printf("Architecture-Compiler: %s-%s", gSystem->GetBuildArch(),
4824  gSystem->GetBuildCompilerVersion());
4825  Printf("Protocol version: %d", GetClientProtocol());
4826  Printf("Image name: %s", GetImage());
4827  Printf("Working directory: %s", gSystem->WorkingDirectory());
4828  Printf("Config directory: %s", GetConfDir());
4829  Printf("Config file: %s", GetConfFile());
4830  Printf("Log level: %d", GetLogLevel());
4831  Printf("Number of workers: %d", GetNumberOfSlaves());
4832  Printf("Number of active workers: %d", GetNumberOfActiveSlaves());
4833  Printf("Number of unique workers: %d", GetNumberOfUniqueSlaves());
4834  Printf("Number of inactive workers: %d", GetNumberOfInactiveSlaves());
4835  Printf("Number of bad workers: %d", GetNumberOfBadSlaves());
4836  Printf("Total MB's processed: %.2f", float(GetBytesRead())/(1024*1024));
4837  Printf("Total real time used (s): %.3f", GetRealTime());
4838  Printf("Total CPU time used (s): %.3f", GetCpuTime());
4839  if (TString(option).Contains("a", TString::kIgnoreCase) && GetNumberOfSlaves()) {
4840  Printf("List of workers:");
4841  TList masters;
4842  TIter nextslave(fSlaves);
4843  while (TSlave* sl = dynamic_cast<TSlave*>(nextslave())) {
4844  if (!sl->IsValid()) continue;
4845 
4846  if (sl->GetSlaveType() == TSlave::kSlave) {
4847  sl->Print(option);
4848  } else if (sl->GetSlaveType() == TSlave::kMaster) {
4849  TMessage mess(kPROOF_PRINT);
4850  mess.WriteString(option);
4851  if (sl->GetSocket()->Send(mess) == -1)
4852  const_cast<TProof*>(this)->MarkBad(sl, "could not send kPROOF_PRINT request");
4853  else
4854  masters.Add(sl);
4855  } else {
4856  Error("Print", "TSlave is neither Master nor Worker");
4857  R__ASSERT(0);
4858  }
4859  }
4860  const_cast<TProof*>(this)->Collect(&masters, fCollectTimeout);
4861  }
4862  }
4863 }
4864 
4865 ////////////////////////////////////////////////////////////////////////////////
4866 /// Extract from opt information about output handling settings.
4867 /// The understood keywords are:
4868 /// of=<file>, outfile=<file> output file location
4869 /// ds=<dsname>, dataset=<dsname> dataset name ('of' and 'ds' are
4870 /// mutually exclusive,execution stops
4871 /// if both are found)
4872 /// sft[=<opt>], savetofile[=<opt>] control saving to file
4873 ///
4874 /// For 'mvf', the <opt> integer has the following meaning:
4875 /// <opt> = <how>*10 + <force>
4876 /// <force> = 0 save to file if memory threshold is reached
4877 /// (the memory threshold is set by the cluster
4878 /// admin); in case an output file is defined, the
4879 /// files are merged at the end;
4880 /// 1 save results to file.
4881 /// <how> = 0 save at the end of the query
4882 /// 1 save results after each packet (to reduce the
4883 /// loss in case of crash).
4884 ///
4885 /// Setting 'ds' automatically sets 'mvf=1'; it is still possible to set 'mvf=11'
4886 /// to save results after each packet.
4887 ///
4888 /// The separator from the next option is either a ' ' or a ';'
4889 ///
4890 /// All recognized settings are removed from the input string opt.
4891 /// If action == 0, set up the output file accordingly, if action == 1 clean related
4892 /// output file settings.
4893 /// If the final target file is local then 'target' is set to the final local path
4894 /// when action == 0 and used to retrieve the file with TFile::Cp when action == 1.
4895 ///
4896 /// Output file settings are in the form
4897 ///
4898 /// <previous_option>of=name <next_option>
4899 /// <previous_option>outfile=name,...;<next_option>
4900 ///
4901 /// The separator from the next option is either a ' ' or a ';'
4902 /// Called interanally by TProof::Process.
4903 ///
4904 /// Returns 0 on success, -1 on error.
4905 
4906 Int_t TProof::HandleOutputOptions(TString &opt, TString &target, Int_t action)
4907 {
4908  TString outfile, dsname, stfopt;
4909  if (action == 0) {
4910  TString tagf, tagd, tags, oo;
4911  Ssiz_t from = 0, iof = kNPOS, iod = kNPOS, ios = kNPOS;
4912  while (opt.Tokenize(oo, from, "[; ]")) {
4913  if (oo.BeginsWith("of=")) {
4914  tagf = "of=";
4915  iof = opt.Index(tagf);
4916  } else if (oo.BeginsWith("outfile=")) {
4917  tagf = "outfile=";
4918  iof = opt.Index(tagf);
4919  } else if (oo.BeginsWith("ds")) {
4920  tagd = "ds";
4921  iod = opt.Index(tagd);
4922  } else if (oo.BeginsWith("dataset")) {
4923  tagd = "dataset";
4924  iod = opt.Index(tagd);
4925  } else if (oo.BeginsWith("stf")) {
4926  tags = "stf";
4927  ios = opt.Index(tags);
4928  } else if (oo.BeginsWith("savetofile")) {
4929  tags = "savetofile";
4930  ios = opt.Index(tags);
4931  }
4932  }
4933  // Check consistency
4934  if (iof != kNPOS && iod != kNPOS) {
4935  Error("HandleOutputOptions", "options 'of'/'outfile' and 'ds'/'dataset' are incompatible!");
4936  return -1;
4937  }
4938 
4939  // Check output file first
4940  if (iof != kNPOS) {
4941  from = iof + tagf.Length();
4942  if (!opt.Tokenize(outfile, from, "[; ]") || outfile.IsNull()) {
4943  Error("HandleOutputOptions", "could not extract output file settings string! (%s)", opt.Data());
4944  return -1;
4945  }
4946  // For removal from original options string
4947  tagf += outfile;
4948  }
4949  // Check dataset
4950  if (iod != kNPOS) {
4951  from = iod + tagd.Length();
4952  if (!opt.Tokenize(dsname, from, "[; ]"))
4953  if (gDebug > 0) Info("HandleOutputOptions", "no dataset name found: use default");
4954  // For removal from original options string
4955  tagd += dsname;
4956  // The name may be empty or beginning with a '='
4957  if (dsname.BeginsWith("=")) dsname.Replace(0, 1, "");
4958  if (dsname.Contains("|V")) {
4959  target = "ds|V";
4960  dsname.ReplaceAll("|V", "");
4961  }
4962  if (dsname.IsNull()) dsname = "dataset_<qtag>";
4963  }
4964  // Check stf
4965  if (ios != kNPOS) {
4966  from = ios + tags.Length();
4967  if (!opt.Tokenize(stfopt, from, "[; ]"))
4968  if (gDebug > 0) Info("HandleOutputOptions", "save-to-file not found: use default");
4969  // For removal from original options string
4970  tags += stfopt;
4971  // It must be digit
4972  if (!stfopt.IsNull()) {
4973  if (stfopt.BeginsWith("=")) stfopt.Replace(0,1,"");
4974  if (!stfopt.IsNull()) {
4975  if (!stfopt.IsDigit()) {
4976  Error("HandleOutputOptions", "save-to-file option must be a digit! (%s)", stfopt.Data());
4977  return -1;
4978  }
4979  } else {
4980  // Default
4981  stfopt = "1";
4982  }
4983  } else {
4984  // Default
4985  stfopt = "1";
4986  }
4987  }
4988  // Remove from original options string
4989  opt.ReplaceAll(tagf, "");
4990  opt.ReplaceAll(tagd, "");
4991  opt.ReplaceAll(tags, "");
4992  }
4993 
4994  // Parse now
4995  if (action == 0) {
4996  // Output file
4997  if (!outfile.IsNull()) {
4998  if (!outfile.BeginsWith("master:")) {
4999  if (gSystem->AccessPathName(gSystem->DirName(outfile.Data()), kWritePermission)) {
5000  Warning("HandleOutputOptions",
5001  "directory '%s' for the output file does not exists or is not writable:"
5002  " saving to master", gSystem->DirName(outfile.Data()));
5003  outfile.Form("master:%s", gSystem->BaseName(outfile.Data()));
5004  } else {
5005  if (!IsLite()) {
5006  // The target file is local, so we need to retrieve it
5007  target = outfile;
5008  if (!stfopt.IsNull()) {
5009  outfile.Form("master:%s", gSystem->BaseName(target.Data()));
5010  } else {
5011  outfile = "";
5012  }
5013  }
5014  }
5015  }
5016  if (outfile.BeginsWith("master:")) {
5017  outfile.ReplaceAll("master:", "");
5018  if (outfile.IsNull() || !gSystem->IsAbsoluteFileName(outfile)) {
5019  // Get the master data dir
5020  TString ddir, emsg;
5021  if (!IsLite()) {
5022  if (Exec("gProofServ->GetDataDir()", "0", kTRUE) == 0) {
5023  TObjString *os = fMacroLog.GetLineWith("const char");
5024  if (os) {
5025  Ssiz_t fst = os->GetString().First('\"');
5026  Ssiz_t lst = os->GetString().Last('\"');
5027  ddir = os->GetString()(fst+1, lst-fst-1);
5028  } else {
5029  emsg = "could not find 'const char *' string in macro log! cannot continue";
5030  }
5031  } else {
5032  emsg = "could not retrieve master data directory info! cannot continue";
5033  }
5034  if (!emsg.IsNull()) {
5035  Error("HandleOutputOptions", "%s", emsg.Data());
5036  return -1;
5037  }
5038  }
5039  if (!ddir.IsNull()) ddir += "/";
5040  if (outfile.IsNull()) {
5041  outfile.Form("%s<file>", ddir.Data());
5042  } else {
5043  outfile.Insert(0, TString::Format("%s", ddir.Data()));
5044  }
5045  }
5046  }
5047  // Set the parameter
5048  if (!outfile.IsNull()) {
5049  if (!outfile.BeginsWith("of:")) outfile.Insert(0, "of:");
5050  SetParameter("PROOF_DefaultOutputOption", outfile.Data());
5051  }
5052  }
5053  // Dataset creation
5054  if (!dsname.IsNull()) {
5055  dsname.Insert(0, "ds:");
5056  // Set the parameter
5057  SetParameter("PROOF_DefaultOutputOption", dsname.Data());
5058  // Check the Save-To-File option
5059  if (!stfopt.IsNull()) {
5060  Int_t ostf = (Int_t) stfopt.Atoi();
5061  if (ostf%10 <= 0) {
5062  Warning("HandleOutputOptions", "Dataset required bu Save-To-File disabled: enabling!");
5063  stfopt.Form("%d", ostf+1);
5064  }
5065  } else {
5066  // Minimal setting
5067  stfopt = "1";
5068  }
5069  }
5070  // Save-To-File options
5071  if (!stfopt.IsNull()) {
5072  // Set the parameter
5073  SetParameter("PROOF_SavePartialResults", (Int_t) stfopt.Atoi());
5074  }
5075  } else {
5076  // Retrieve the file, if required
5077  if (GetOutputList()) {
5078  if (target == "ds|V") {
5079  // Find the dataset
5080  dsname = "";
5081  TIter nxo(GetOutputList());
5082  TObject *o = 0;
5083  while ((o = nxo())) {
5084  if (o->InheritsFrom(TFileCollection::Class())) {
5085  VerifyDataSet(o->GetName());
5086  dsname = o->GetName();
5087  break;
5088  }
5089  }
5090  if (!dsname.IsNull()) {
5091  TFileCollection *fc = GetDataSet(dsname);
5092  if (fc) {
5093  fc->Print();
5094  } else {
5095  Warning("HandleOutputOptions", "could not retrieve TFileCollection for dataset '%s'", dsname.Data());
5096  }
5097  } else {
5098  Warning("HandleOutputOptions", "dataset not found!");
5099  }
5100  } else {
5101  Bool_t targetcopied = kFALSE;
5102  TProofOutputFile *pf = 0;
5103  if (!target.IsNull())
5104  pf = (TProofOutputFile *) GetOutputList()->FindObject(gSystem->BaseName(target.Data()));
5105  if (pf) {
5106  // Copy the file
5107  if (strcmp(TUrl(pf->GetOutputFileName(), kTRUE).GetUrl(),
5108  TUrl(target, kTRUE).GetUrl())) {
5109  if (TFile::Cp(pf->GetOutputFileName(), target)) {
5110  Printf(" Output successfully copied to %s", target.Data());
5111  targetcopied = kTRUE;
5112  } else {
5113  Warning("HandleOutputOptions", "problems copying output to %s", target.Data());
5114  }
5115  }
5116  }
5117  TFile *fout = 0;
5118  TObject *o = 0;
5119  TIter nxo(GetOutputList());
5120  Bool_t swapcopied = kFALSE;
5121  while ((o = nxo())) {
5122  TProofOutputFile *pof = dynamic_cast<TProofOutputFile *>(o);
5123  if (pof) {
5124  if (pof->TestBit(TProofOutputFile::kSwapFile) && !target.IsNull()) {
5125  if (pof == pf && targetcopied) continue;
5126  // Copy the file
5127  if (strcmp(TUrl(pf->GetOutputFileName(), kTRUE).GetUrl(),
5128  TUrl(target, kTRUE).GetUrl())) {
5129  if (TFile::Cp(pof->GetOutputFileName(), target)) {
5130  Printf(" Output successfully copied to %s", target.Data());
5131  swapcopied = kTRUE;
5132  } else {
5133  Warning("HandleOutputOptions", "problems copying output to %s", target.Data());
5134  }
5135  }
5136  } else if (pof->IsRetrieve()) {
5137  // Retrieve this file to the local path indicated in the title
5138  if (strcmp(TUrl(pf->GetOutputFileName(), kTRUE).GetUrl(),
5139  TUrl(pof->GetTitle(), kTRUE).GetUrl())) {
5140  if (TFile::Cp(pof->GetOutputFileName(), pof->GetTitle())) {
5141  Printf(" Output successfully copied to %s", pof->GetTitle());
5142  } else {
5143  Warning("HandleOutputOptions",
5144  "problems copying %s to %s", pof->GetOutputFileName(), pof->GetTitle());
5145  }
5146  }
5147  }
5148  }
5149  }
5150  if (!target.IsNull() && !swapcopied) {
5151  if (!fout && !pf) {
5152  fout = TFile::Open(target, "RECREATE");
5153  if (!fout || (fout && fout->IsZombie())) {
5154  SafeDelete(fout);
5155  Warning("HandleOutputOptions", "problems opening output file %s", target.Data());
5156  }
5157  }
5158  if (fout) {
5159  nxo.Reset();
5160  while ((o = nxo())) {
5161  TProofOutputFile *pof = dynamic_cast<TProofOutputFile *>(o);
5162  if (!pof) {
5163  // Write the object to the open output file
5164  o->Write();
5165  }
5166  }
5167  }
5168  }
5169  // Clean-up
5170  if (fout) {
5171  fout->Close();
5172  SafeDelete(fout);
5173  Printf(" Output saved to %s", target.Data());
5174  }
5175  }
5176  }
5177  // Remove the parameter
5178  DeleteParameters("PROOF_DefaultOutputOption");
5179  // Remove the parameter
5180  DeleteParameters("PROOF_SavePartialResults");
5181  }
5182  // Done
5183  return 0;
5184 }
5185 
5186 ////////////////////////////////////////////////////////////////////////////////
5187 /// Extract from opt in optfb information about wanted feedback settings.
5188 /// Feedback are removed from the input string opt.
5189 /// If action == 0, set up feedback accordingly, if action == 1 clean related
5190 /// feedback settings (using info in optfb, if available, or reparsing opt).
5191 ///
5192 /// Feedback requirements are in the form
5193 ///
5194 /// <previous_option>fb=name1,name2,name3,... <next_option>
5195 /// <previous_option>feedback=name1,name2,name3,...;<next_option>
5196 ///
5197 /// The special name 'stats' triggers feedback about events and packets.
5198 /// The separator from the next option is either a ' ' or a ';'.
5199 /// Called interanally by TProof::Process.
5200 
5201 void TProof::SetFeedback(TString &opt, TString &optfb, Int_t action)
5202 {
5203  Ssiz_t from = 0;
5204  if (action == 0 || (action == 1 && optfb.IsNull())) {
5205  TString tag("fb=");
5206  Ssiz_t ifb = opt.Index(tag);
5207  if (ifb == kNPOS) {
5208  tag = "feedback=";
5209  ifb = opt.Index(tag);
5210  }
5211  if (ifb == kNPOS) return;
5212  from = ifb + tag.Length();
5213 
5214  if (!opt.Tokenize(optfb, from, "[; ]") || optfb.IsNull()) {
5215  Warning("SetFeedback", "could not extract feedback string! Ignoring ...");
5216  return;
5217  }
5218  // Remove from original options string
5219  tag += optfb;
5220  opt.ReplaceAll(tag, "");
5221  }
5222 
5223  // Parse now
5224  TString nm, startdraw, stopdraw;
5225  from = 0;
5226  while (optfb.Tokenize(nm, from, ",")) {
5227  // Special name first
5228  if (nm == "stats") {
5229  if (action == 0) {
5230  startdraw.Form("gDirectory->Add(new TStatsFeedback((TProof *)%p))", this);
5231  gROOT->ProcessLine(startdraw.Data());
5232  SetParameter("PROOF_StatsHist", "");
5233  AddFeedback("PROOF_EventsHist");
5234  AddFeedback("PROOF_PacketsHist");
5235  AddFeedback("PROOF_ProcPcktHist");
5236  } else {
5237  stopdraw.Form("TObject *o = gDirectory->FindObject(\"%s\"); "
5238  " if (o && strcmp(o->ClassName(), \"TStatsFeedback\")) "
5239  " { gDirectory->Remove(o); delete o; }", GetSessionTag());
5240  gROOT->ProcessLine(stopdraw.Data());
5241  DeleteParameters("PROOF_StatsHist");
5242  RemoveFeedback("PROOF_EventsHist");
5243  RemoveFeedback("PROOF_PacketsHist");
5244  RemoveFeedback("PROOF_ProcPcktHist");
5245  }
5246  } else {
5247  if (action == 0) {
5248  // Enable or
5249  AddFeedback(nm);
5250  startdraw.Form("gDirectory->Add(new TDrawFeedback((TProof *)%p))", this);
5251  gROOT->ProcessLine(startdraw.Data());
5252  } else {
5253  // ... or disable
5254  RemoveFeedback(nm);
5255  stopdraw.Form("TObject *o = gDirectory->FindObject(\"%s\"); "
5256  " if (o && strcmp(o->ClassName(), \"TDrawFeedback\")) "
5257  " { gDirectory->Remove(o); delete o; }", GetSessionTag());
5258  gROOT->ProcessLine(stopdraw.Data());
5259  }
5260  }
5261  }
5262 }
5263 
5264 ////////////////////////////////////////////////////////////////////////////////
5265 /// Process a data set (TDSet) using the specified selector (.C) file or
5266 /// Tselector object
5267 /// Entry- or event-lists should be set in the data set object using
5268 /// TDSet::SetEntryList.
5269 /// The return value is -1 in case of error and TSelector::GetStatus() in
5270 /// in case of success.
5271 
5272 Long64_t TProof::Process(TDSet *dset, const char *selector, Option_t *option,
5273  Long64_t nentries, Long64_t first)
5274 {
5275  if (!IsValid() || !fPlayer) return -1;
5276 
5277  // Set PROOF to running state
5278  SetRunStatus(TProof::kRunning);
5279 
5280  TString opt(option), optfb, outfile;
5281  // Enable feedback, if required
5282  if (opt.Contains("fb=") || opt.Contains("feedback=")) SetFeedback(opt, optfb, 0);
5283  // Define output file, either from 'opt' or the default one
5284  if (HandleOutputOptions(opt, outfile, 0) != 0) return -1;
5285 
5286  // Resolve query mode
5287  fSync = (GetQueryMode(opt) == kSync);
5288 
5289  if (fSync && (!IsIdle() || IsWaiting())) {
5290  // Already queued or processing queries: switch to asynchronous mode
5291  Info("Process", "session is in waiting or processing status: switch to asynchronous mode");
5292  fSync = kFALSE;
5293  opt.ReplaceAll("SYNC","");
5294  opt += "ASYN";
5295  }
5296 
5297  // Cleanup old temporary datasets
5298  if ((IsIdle() && !IsWaiting()) && fRunningDSets && fRunningDSets->GetSize() > 0) {
5299  fRunningDSets->SetOwner(kTRUE);
5300  fRunningDSets->Delete();
5301  }
5302 
5303  // deactivate the default application interrupt handler
5304  // ctrl-c's will be forwarded to PROOF to stop the processing
5305  TSignalHandler *sh = 0;
5306  if (fSync) {
5307  if (gApplication)
5308  sh = gSystem->RemoveSignalHandler(gApplication->GetSignalHandler());
5309  }
5310 
5311  // Make sure we get a fresh result
5312  fOutputList.Clear();
5313 
5314  // Make sure that the workers ready list is empty
5315  if (fWrksOutputReady) {
5316  fWrksOutputReady->SetOwner(kFALSE);
5317  fWrksOutputReady->Clear();
5318  }
5319 
5320  // Make sure the selector path is in the macro path
5321  TProof::AssertMacroPath(selector);
5322 
5323  // Reset time measurements
5324  fQuerySTW.Reset();
5325 
5326  Long64_t rv = -1;
5327  if (selector && strlen(selector)) {
5328  rv = fPlayer->Process(dset, selector, opt.Data(), nentries, first);
5329  } else if (fSelector) {
5330  rv = fPlayer->Process(dset, fSelector, opt.Data(), nentries, first);
5331  } else {
5332  Error("Process", "neither a selecrot file nor a selector object have"
5333  " been specified: cannot process!");
5334  }
5335 
5336  // This is the end of merging
5337  fQuerySTW.Stop();
5338  Float_t rt = fQuerySTW.RealTime();
5339  // Update the query content
5340  TQueryResult *qr = GetQueryResult();
5341  if (qr) {
5342  qr->SetTermTime(rt);
5343  qr->SetPrepTime(fPrepTime);
5344  }
5345 
5346  // Disable feedback, if required
5347  if (!optfb.IsNull()) SetFeedback(opt, optfb, 1);
5348  // Finalise output file settings (opt is ignored in here)
5349  if (HandleOutputOptions(opt, outfile, 1) != 0) return -1;
5350 
5351  // Retrieve status from the output list
5352  if (rv >= 0) {
5353  TParameter<Long64_t> *sst =
5354  (TParameter<Long64_t> *) fOutputList.FindObject("PROOF_SelectorStatus");
5355  if (sst) rv = sst->GetVal();
5356  }
5357 
5358  if (fSync) {
5359  // reactivate the default application interrupt handler
5360  if (sh)
5361  gSystem->AddSignalHandler(sh);
5362  // Save the performance info, if required
5363  if (!fPerfTree.IsNull()) {
5364  if (SavePerfTree() != 0) Error("Process", "saving performance info ...");
5365  // Must be re-enabled each time
5366  SetPerfTree(0);
5367  }
5368  }
5369 
5370  return rv;
5371 }
5372 
5373 ////////////////////////////////////////////////////////////////////////////////
5374 /// Process a data set (TFileCollection) using the specified selector (.C) file
5375 /// or TSelector object.
5376 /// The default tree is analyzed (i.e. the first one found). To specify another
5377 /// tree, the default tree can be changed using TFileCollection::SetDefaultMetaData .
5378 /// The return value is -1 in case of error and TSelector::GetStatus() in
5379 /// in case of success.
5380 
5381 Long64_t TProof::Process(TFileCollection *fc, const char *selector,
5382  Option_t *option, Long64_t nentries, Long64_t first)
5383 {
5384  if (!IsValid() || !fPlayer) return -1;
5385 
5386  if (fProtocol < 17) {
5387  Info("Process", "server version < 5.18/00:"
5388  " processing of TFileCollection not supported");
5389  return -1;
5390  }
5391 
5392  // We include the TFileCollection to the input list and we create a
5393  // fake TDSet with infor about it
5394  TDSet *dset = new TDSet(TString::Format("TFileCollection:%s", fc->GetName()), 0, 0, "");
5395  fPlayer->AddInput(fc);
5396 
5397 
5398  Long64_t retval = -1;
5399  if (selector && strlen(selector)) {
5400  retval = Process(dset, selector, option, nentries, first);
5401  } else if (fSelector) {
5402  retval = Process(dset, fSelector, option, nentries, first);
5403  } else {
5404  Error("Process", "neither a selecrot file nor a selector object have"
5405  " been specified: cannot process!");
5406  }
5407  fPlayer->GetInputList()->Remove(fc); // To avoid problems in future
5408 
5409  // Cleanup
5410  if (IsLite() && !fSync) {
5411  if (!fRunningDSets) fRunningDSets = new TList;
5412  fRunningDSets->Add(dset);
5413  } else {
5414  delete dset;
5415  }
5416 
5417  return retval;
5418 }
5419 
5420 ////////////////////////////////////////////////////////////////////////////////
5421 /// Process a dataset which is stored on the master with name 'dsetname'.
5422 /// The syntax for dsetname is name[#[dir/]objname], e.g.
5423 /// "mydset" analysis of the first tree in the top dir of the dataset
5424 /// named "mydset"
5425 /// "mydset#T" analysis tree "T" in the top dir of the dataset
5426 /// named "mydset"
5427 /// "mydset#adir/T" analysis tree "T" in the dir "adir" of the dataset
5428 /// named "mydset"
5429 /// "mydset#adir/" analysis of the first tree in the dir "adir" of the
5430 /// dataset named "mydset"
5431 /// The component 'name' in its more general form contains also the group and
5432 /// user name following "/<group>/<user>/<dsname>". Each of these components
5433 /// can contain one or more wildcards '*', in which case all the datasets matching
5434 /// the expression are added together as a global dataset (wildcard support has
5435 /// been added in version 5.27/02).
5436 /// The last argument 'elist' specifies an entry- or event-list to be used as
5437 /// event selection.
5438 /// It is also possible (starting w/ version 5.27/02) to run on multiple datasets
5439 /// at once in a more flexible way that the one provided by wildcarding. There
5440 /// are three possibilities:
5441 /// 1) specifying the dataset names separated by the OR operator '|', e.g.
5442 /// dsetname = "<dset1>|<dset2>|<dset3>|..."
5443 /// in this case the datasets are a seen as a global unique dataset
5444 /// 2) specifying the dataset names separated by a ',' or a ' ', e.g.
5445 /// dsetname = "<dset1>,<dset2> <dset3>,..."
5446 /// in this case the datasets are processed one after the other and the
5447 /// selector is notified when switching dataset via a bit in the current
5448 /// processed element.
5449 /// 3) giving the path of a textfile where the dataset names are specified
5450 /// on one or multiple lines; the lines found are joined as in 1), unless
5451 /// the filepath is followed by a ',' (i.e. p->Process("datasets.txt,",...)
5452 /// with the dataset names listed in 'datasets.txt') in which case they are
5453 /// treated as in 2); the file is open in raw mode with TFile::Open and
5454 /// therefore it cane be remote, e.g. on a Web server.
5455 /// Each <dsetj> has the format specified above for the single dataset processing,
5456 /// included wildcarding (the name of the tree and subdirectory must be same for
5457 /// all the datasets).
5458 /// In the case of multiple datasets, 'elist' is treated a global entry list.
5459 /// It is possible to specify per-dataset entry lists using the syntax
5460 /// "mydset[#adir/[T]]?enl=entrylist"
5461 /// or
5462 /// "mydset[#adir/[T]]<<entrylist"
5463 /// Here 'entrylist' is a tag identifying, in the order :
5464 /// i. a named entry-list in the input list or in the input data list
5465 /// ii. a named entry-list in memory (in gDirectory)
5466 /// iii. the path of a file containing the entry-list to be used
5467 /// In the case ii) and iii) the entry-list object(s) is(are) added to the input
5468 /// data list.
5469 /// The return value is -1 in case of error and TSelector::GetStatus() in
5470 /// in case of success.
5471 
5472 Long64_t TProof::Process(const char *dsetname, const char *selector,
5473  Option_t *option, Long64_t nentries,
5474  Long64_t first, TObject *elist)
5475 {
5476  if (fProtocol < 13) {
5477  Info("Process", "processing 'by name' not supported by the server");
5478  return -1;
5479  }
5480 
5481  TString dsname, fname(dsetname);
5482  // If the 'dsetname' corresponds to an existing and readable file we will try to
5483  // interpretate its content as names of datasets to be processed. One line can contain
5484  // more datasets, separated by ',' or '|'. By default the dataset lines will be added
5485  // (i.e. joined as in option '|'); if the file name ends with ',' the dataset lines are
5486  // joined with ','.
5487  const char *separator = (fname.EndsWith(",")) ? "," : "|";
5488  if (!strcmp(separator, ",") || fname.EndsWith("|")) fname.Remove(fname.Length()-1, 1);
5489  if (!(gSystem->AccessPathName(fname, kReadPermission))) {
5490  TUrl uf(fname, kTRUE);
5491  uf.SetOptions(TString::Format("%sfiletype=raw", uf.GetOptions()));
5492  TFile *f = TFile::Open(uf.GetUrl());
5493  if (f && !(f->IsZombie())) {
5494  const Int_t blen = 8192;
5495  char buf[blen];
5496  Long64_t rest = f->GetSize();
5497  while (rest > 0) {
5498  Long64_t len = (rest > blen - 1) ? blen - 1 : rest;
5499  if (f->ReadBuffer(buf, len)) {
5500  Error("Process", "problems reading from file '%s'", fname.Data());
5501  dsname = "";
5502  break;
5503  }
5504  buf[len] = '\0';
5505  dsname += buf;
5506  rest -= len;
5507  }
5508  f->Close();
5509  SafeDelete(f);
5510  // We fail if a failure occured
5511  if (rest > 0) return -1;
5512  } else {
5513  Error("Process", "could not open file '%s'", fname.Data());
5514  return -1;
5515  }
5516  }
5517  if (dsname.IsNull()) {
5518  dsname = dsetname;
5519  } else {
5520  // Remove trailing '\n'
5521  if (dsname.EndsWith("\n")) dsname.Remove(dsname.Length()-1, 1);
5522  // Replace all '\n' with the proper separator
5523  dsname.ReplaceAll("\n", separator);
5524  if (gDebug > 0) {
5525  Info("Process", "processing multi-dataset read from file '%s':", fname.Data());
5526  Info("Process", " '%s'", dsname.Data());
5527  }
5528  }
5529 
5530  TString names(dsname), name, enl, newname;
5531  // If multi-dataset check if server supports it
5532  if (fProtocol < 28 && names.Index(TRegexp("[, |]")) != kNPOS) {
5533  Info("Process", "multi-dataset processing not supported by the server");
5534  return -1;
5535  }
5536 
5537  TEntryList *el = 0;
5538  TString dsobj, dsdir;
5539  Int_t from = 0;
5540  while (names.Tokenize(name, from, "[, |]")) {
5541 
5542  newname = name;
5543  // Extract the specific entry-list, if any
5544  enl = "";
5545  Int_t ienl = name.Index("?enl=");
5546  if (ienl == kNPOS) {
5547  ienl = name.Index("<<");
5548  if (ienl != kNPOS) {
5549  newname.Remove(ienl);
5550  ienl += strlen("<<");
5551  }
5552  } else {
5553  newname.Remove(ienl);
5554  ienl += strlen("?enl=");
5555  }
5556 
5557  // Check the name syntax first
5558  TString obj, dir("/");
5559  Int_t idxc = newname.Index("#");
5560  if (idxc != kNPOS) {
5561  Int_t idxs = newname.Index("/", 1, idxc, TString::kExact);
5562  if (idxs != kNPOS) {
5563  obj = newname(idxs+1, newname.Length());
5564  dir = newname(idxc+1, newname.Length());
5565  dir.Remove(dir.Index("/") + 1);
5566  newname.Remove(idxc);
5567  } else {
5568  obj = newname(idxc+1, newname.Length());
5569  newname.Remove(idxc);
5570  }
5571  } else if (newname.Index(":") != kNPOS && newname.Index("://") == kNPOS) {
5572  // protection against using ':' instead of '#'
5573  Error("Process", "bad name syntax (%s): please use"
5574  " a '#' after the dataset name", name.Data());
5575  dsname.ReplaceAll(name, "");
5576  continue;
5577  }
5578  if (dsobj.IsNull() && dsdir.IsNull()) {
5579  // The first one specifies obj and dir
5580  dsobj = obj;
5581  dsdir = dir;
5582  } else if (obj != dsobj || dir != dsdir) {
5583  // Inconsistent specification: not supported
5584  Warning("Process", "'obj' or 'dir' specification not consistent w/ the first given: ignore");
5585  }
5586  // Process the entry-list name, if any
5587  if (ienl != kNPOS) {
5588  // Get entrylist name or path
5589  enl = name(ienl, name.Length());
5590  el = 0;
5591  TObject *oel = 0;
5592  // If not in the input list ...
5593  TList *inpl = GetInputList();
5594  if (inpl && (oel = inpl->FindObject(enl))) el = dynamic_cast<TEntryList *>(oel);
5595  // ... check the heap
5596  if (!el && gDirectory && (oel = gDirectory->FindObject(enl))) {
5597  if ((el = dynamic_cast<TEntryList *>(oel))) {
5598  // Add to the input list (input data not available on master where
5599  // this info will be processed)
5600  if (fProtocol >= 28)
5601  if (!(inpl->FindObject(el->GetName()))) AddInput(el);
5602  }
5603  }
5604  // If not in the heap, check a file, if any
5605  if (!el) {
5606  if (!gSystem->AccessPathName(enl)) {
5607  TFile *f = TFile::Open(enl);
5608  if (f && !(f->IsZombie()) && f->GetListOfKeys()) {
5609  TIter nxk(f->GetListOfKeys());
5610  TKey *k = 0;
5611  while ((k = (TKey *) nxk())) {
5612  if (!strcmp(k->GetClassName(), "TEntryList")) {
5613  if (!el) {
5614  if ((el = dynamic_cast<TEntryList *>(f->Get(k->GetName())))) {
5615  // Add to the input list (input data not available on master where
5616  // this info will be processed)
5617  if (fProtocol >= 28) {
5618  if (!(inpl->FindObject(el->GetName()))) {
5619  el = (TEntryList *) el->Clone();
5620  AddInput(el);
5621  }
5622  } else {
5623  el = (TEntryList *) el->Clone();
5624  }
5625  }
5626  } else if (strcmp(el->GetName(), k->GetName())) {
5627  Warning("Process", "multiple entry lists found in file '%s': the first one is taken;\n"
5628  "if this is not what you want, load first the content in memory"
5629  "and select it by name ", enl.Data());
5630  }
5631  }
5632  }
5633  } else {
5634  Warning("Process","file '%s' cannot be open or is empty - ignoring", enl.Data());
5635  }
5636  }
5637  }
5638  // Transmit the information
5639  if (fProtocol >= 28) {
5640  newname += "?enl=";
5641  if (el) {
5642  // An entry list object is avalaible in the input list: add its name
5643  newname += el->GetName();
5644  } else {
5645  // The entry list object was not found: send the name, the future entry list manager will
5646  // find it on the server side
5647  newname += enl;
5648  }
5649  }
5650  }
5651  // Adjust the name for this dataset
5652  dsname.ReplaceAll(name, newname);
5653  }
5654 
5655  // Create the dataset object
5656  TDSet *dset = new TDSet(dsname, dsobj, dsdir);
5657  // Set entry list
5658  if (el && fProtocol < 28) {
5659  dset->SetEntryList(el);
5660  } else {
5661  dset->SetEntryList(elist);
5662  }
5663  // Run
5664  Long64_t retval = -1;
5665  if (selector && strlen(selector)) {
5666  retval = Process(dset, selector, option, nentries, first);
5667  } else if (fSelector) {
5668  retval = Process(dset, fSelector, option, nentries, first);
5669  } else {
5670  Error("Process", "neither a selector file nor a selector object have"
5671  " been specified: cannot process!");
5672  }
5673  // Cleanup
5674  if (IsLite() && !fSync) {
5675  if (!fRunningDSets) fRunningDSets = new TList;
5676  fRunningDSets->Add(dset);
5677  } else {
5678  delete dset;
5679  }
5680 
5681  return retval;
5682 }
5683 
5684 ////////////////////////////////////////////////////////////////////////////////
5685 /// Generic (non-data based) selector processing: the Process() method of the
5686 /// specified selector (.C) or TSelector object is called 'n' times.
5687 /// The return value is -1 in case of error and TSelector::GetStatus() in
5688 /// in case of success.
5689 
5690 Long64_t TProof::Process(const char *selector, Long64_t n, Option_t *option)
5691 {
5692  if (!IsValid()) return -1;
5693 
5694  if (fProtocol < 16) {
5695  Info("Process", "server version < 5.17/04: generic processing not supported");
5696  return -1;
5697  }
5698 
5699  // Fake data set
5700  TDSet *dset = new TDSet;
5701  dset->SetBit(TDSet::kEmpty);
5702 
5703  Long64_t retval = -1;
5704  if (selector && strlen(selector)) {
5705  retval = Process(dset, selector, option, n);
5706  } else if (fSelector) {
5707  retval = Process(dset, fSelector, option, n);
5708  } else {
5709  Error("Process", "neither a selector file nor a selector object have"
5710  " been specified: cannot process!");
5711  }
5712 
5713  // Cleanup
5714  if (IsLite() && !fSync) {
5715  if (!fRunningDSets) fRunningDSets = new TList;
5716  fRunningDSets->Add(dset);
5717  } else {
5718  delete dset;
5719  }
5720  return retval;
5721 }
5722 
5723 ////////////////////////////////////////////////////////////////////////////////
5724 /// Process a data set (TDSet) using the specified selector object.
5725 /// Entry- or event-lists should be set in the data set object using
5726 /// TDSet::SetEntryList.
5727 /// The return value is -1 in case of error and TSelector::GetStatus() in
5728 /// in case of success.
5729 
5730 Long64_t TProof::Process(TDSet *dset, TSelector *selector, Option_t *option,
5731  Long64_t nentries, Long64_t first)
5732 {
5733  if (fProtocol < 34) {
5734  Error("Process", "server version < 5.33/02:"
5735  "processing by object not supported");
5736  return -1;
5737  }
5738  if (!selector) {
5739  Error("Process", "selector object undefined!");
5740  return -1;
5741  }
5742  fSelector = selector;
5743  Long64_t rc = Process(dset, (const char*)0, option, nentries, first);
5744  fSelector = 0;
5745  // Done
5746  return rc;
5747 }
5748 
5749 ////////////////////////////////////////////////////////////////////////////////
5750 /// Process a data set (TFileCollection) using the specified selector object
5751 /// The default tree is analyzed (i.e. the first one found). To specify another
5752 /// tree, the default tree can be changed using TFileCollection::SetDefaultMetaData .
5753 /// The return value is -1 in case of error and TSelector::GetStatus() in
5754 /// in case of success.
5755 
5756 Long64_t TProof::Process(TFileCollection *fc, TSelector *selector,
5757  Option_t *option, Long64_t nentries, Long64_t first)
5758 {
5759  if (fProtocol < 34) {
5760  Error("Process", "server version < 5.33/02:"
5761  "processing by object not supported");
5762  return -1;
5763  }
5764  if (!selector) {
5765  Error("Process", "selector object undefined!");
5766  return -1;
5767  }
5768  fSelector = selector;
5769  Long64_t rc = Process(fc, (const char*)0, option, nentries, first);
5770  fSelector = 0;
5771  // Done
5772  return rc;
5773 }
5774 
5775 ////////////////////////////////////////////////////////////////////////////////
5776 /// Process with name of dataset and TSelector object
5777 
5778 Long64_t TProof::Process(const char *dsetname, TSelector *selector,
5779  Option_t *option, Long64_t nentries,
5780  Long64_t first, TObject *elist)
5781 {
5782  if (fProtocol < 34) {
5783  Error("Process", "server version < 5.33/02:"
5784  "processing by object not supported");
5785  return -1;
5786  }
5787  if (!selector) {
5788  Error("Process", "selector object undefined!");
5789  return -1;
5790  }
5791  fSelector = selector;
5792  Long64_t rc = Process(dsetname, (const char*)0, option, nentries, first, elist);
5793  fSelector = 0;
5794  // Done
5795  return rc;
5796 }
5797 
5798 ////////////////////////////////////////////////////////////////////////////////
5799 /// Generic (non-data based) selector processing: the Process() method of the
5800 /// specified selector is called 'n' times.
5801 /// The return value is -1 in case of error and TSelector::GetStatus() in
5802 /// in case of success.
5803 
5804 Long64_t TProof::Process(TSelector *selector, Long64_t n, Option_t *option)
5805 {
5806  if (fProtocol < 34) {
5807  Error("Process", "server version < 5.33/02:"
5808  "processing by object not supported");
5809  return -1;
5810  }
5811  if (!selector) {
5812  Error("Process", "selector object undefined!");
5813  return -1;
5814  }
5815  fSelector = selector;
5816  Long64_t rc = Process((const char*)0, n, option);
5817  fSelector = 0;
5818  // Done
5819  return rc;
5820 }
5821 
5822 ////////////////////////////////////////////////////////////////////////////////
5823 /// Get reference for the qry-th query in fQueries (as
5824 /// displayed by ShowQueries).
5825 
5826 Int_t TProof::GetQueryReference(Int_t qry, TString &ref)
5827 {
5828  ref = "";
5829  if (qry > 0) {
5830  if (!fQueries)
5831  GetListOfQueries();
5832  if (fQueries) {
5833  TIter nxq(fQueries);
5834  TQueryResult *qr = 0;
5835  while ((qr = (TQueryResult *) nxq()))
5836  if (qr->GetSeqNum() == qry) {
5837  ref.Form("%s:%s", qr->GetTitle(), qr->GetName());
5838  return 0;
5839  }
5840  }
5841  }
5842  return -1;
5843 }
5844 
5845 ////////////////////////////////////////////////////////////////////////////////
5846 /// Finalize the qry-th query in fQueries.
5847 /// If force, force retrieval if the query is found in the local list
5848 /// but has already been finalized (default kFALSE).
5849 /// If query < 0, finalize current query.
5850 /// Return 0 on success, -1 on error
5851 
5852 Long64_t TProof::Finalize(Int_t qry, Bool_t force)
5853 {
5854  if (fPlayer) {
5855  if (qry > 0) {
5856  TString ref;
5857  if (GetQueryReference(qry, ref) == 0) {
5858  return Finalize(ref, force);
5859  } else {
5860  Info("Finalize", "query #%d not found", qry);
5861  }
5862  } else {
5863  // The last query
5864  return Finalize("", force);
5865  }
5866  }
5867  return -1;
5868 }
5869 
5870 ////////////////////////////////////////////////////////////////////////////////
5871 /// Finalize query with reference ref.
5872 /// If force, force retrieval if the query is found in the local list
5873 /// but has already been finalized (default kFALSE).
5874 /// If ref = 0, finalize current query.
5875 /// Return 0 on success, -1 on error
5876 
5877 Long64_t TProof::Finalize(const char *ref, Bool_t force)
5878 {
5879  if (fPlayer) {
5880  // Get the pointer to the query
5881  TQueryResult *qr = (ref && strlen(ref) > 0) ? fPlayer->GetQueryResult(ref)
5882  : GetQueryResult();
5883  Bool_t retrieve = kFALSE;
5884  TString xref(ref);
5885  if (!qr) {
5886  if (!xref.IsNull()) {
5887  retrieve = kTRUE;
5888  }
5889  } else {
5890  if (qr->IsFinalized()) {
5891  if (force) {
5892  retrieve = kTRUE;
5893  } else {
5894  Info("Finalize","query already finalized:"
5895  " use Finalize(<qry>,kTRUE) to force new retrieval");
5896  qr = 0;
5897  }
5898  } else {
5899  retrieve = kTRUE;
5900  xref.Form("%s:%s", qr->GetTitle(), qr->GetName());
5901  }
5902  }
5903  if (retrieve) {
5904  Retrieve(xref.Data());
5905  qr = fPlayer->GetQueryResult(xref.Data());
5906  }
5907  if (qr)
5908  return fPlayer->Finalize(qr);
5909  }
5910  return -1;
5911 }
5912 
5913 ////////////////////////////////////////////////////////////////////////////////
5914 /// Send retrieve request for the qry-th query in fQueries.
5915 /// If path is defined save it to path.
5916 
5917 Int_t TProof::Retrieve(Int_t qry, const char *path)
5918 {
5919  if (qry > 0) {
5920  TString ref;
5921  if (GetQueryReference(qry, ref) == 0)
5922  return Retrieve(ref, path);
5923  else
5924  Info("Retrieve", "query #%d not found", qry);
5925  } else {
5926  Info("Retrieve","positive argument required - do nothing");
5927  }
5928  return -1;
5929 }
5930 
5931 ////////////////////////////////////////////////////////////////////////////////
5932 /// Send retrieve request for the query specified by ref.
5933 /// If path is defined save it to path.
5934 /// Generic method working for all queries known by the server.
5935 
5936 Int_t TProof::Retrieve(const char *ref, const char *path)
5937 {
5938  if (ref) {
5939  TMessage m(kPROOF_RETRIEVE);
5940  m << TString(ref);
5941  Broadcast(m, kActive);
5942  Collect(kActive, fCollectTimeout);
5943 
5944  // Archive it locally, if required
5945  if (path) {
5946 
5947  // Get pointer to query
5948  TQueryResult *qr = fPlayer ? fPlayer->GetQueryResult(ref) : 0;
5949 
5950  if (qr) {
5951 
5952  TFile *farc = TFile::Open(path,"UPDATE");
5953  if (!farc || (farc && !(farc->IsOpen()))) {
5954  Info("Retrieve", "archive file cannot be open (%s)", path);
5955  return 0;
5956  }
5957  farc->cd();
5958 
5959  // Update query status
5960  qr->SetArchived(path);
5961 
5962  // Write to file
5963  qr->Write();
5964 
5965  farc->Close();
5966  SafeDelete(farc);
5967 
5968  } else {
5969  Info("Retrieve", "query not found after retrieve");
5970  return -1;
5971  }
5972  }
5973 
5974  return 0;
5975  }
5976  return -1;
5977 }
5978 
5979 ////////////////////////////////////////////////////////////////////////////////
5980 /// Send remove request for the qry-th query in fQueries.
5981 
5982 Int_t TProof::Remove(Int_t qry, Bool_t all)
5983 {
5984  if (qry > 0) {
5985  TString ref;
5986  if (GetQueryReference(qry, ref) == 0)
5987  return Remove(ref, all);
5988  else
5989  Info("Remove", "query #%d not found", qry);
5990  } else {
5991  Info("Remove","positive argument required - do nothing");
5992  }
5993  return -1;
5994 }
5995 
5996 ////////////////////////////////////////////////////////////////////////////////
5997 /// Send remove request for the query specified by ref.
5998 /// If all = TRUE remove also local copies of the query, if any.
5999 /// Generic method working for all queries known by the server.
6000 /// This method can be also used to reset the list of queries
6001 /// waiting to be processed: for that purpose use ref == "cleanupqueue".
6002 
6003 Int_t TProof::Remove(const char *ref, Bool_t all)
6004 {
6005  if (all) {
6006  // Remove also local copies, if any
6007  if (fPlayer)
6008  fPlayer->RemoveQueryResult(ref);
6009  }
6010 
6011  if (IsLite()) return 0;
6012 
6013  if (ref) {
6014  TMessage m(kPROOF_REMOVE);
6015  m << TString(ref);
6016  Broadcast(m, kActive);
6017  Collect(kActive, fCollectTimeout);
6018  return 0;
6019  }
6020  return -1;
6021 }
6022 
6023 ////////////////////////////////////////////////////////////////////////////////
6024 /// Send archive request for the qry-th query in fQueries.
6025 
6026 Int_t TProof::Archive(Int_t qry, const char *path)
6027 {
6028  if (qry > 0) {
6029  TString ref;
6030  if (GetQueryReference(qry, ref) == 0)
6031  return Archive(ref, path);
6032  else
6033  Info("Archive", "query #%d not found", qry);
6034  } else {
6035  Info("Archive","positive argument required - do nothing");
6036  }
6037  return -1;
6038 }
6039 
6040 ////////////////////////////////////////////////////////////////////////////////
6041 /// Send archive request for the query specified by ref.
6042 /// Generic method working for all queries known by the server.
6043 /// If ref == "Default", path is understood as a default path for
6044 /// archiving.
6045 
6046 Int_t TProof::Archive(const char *ref, const char *path)
6047 {
6048  if (ref) {
6049  TMessage m(kPROOF_ARCHIVE);
6050  m << TString(ref) << TString(path);
6051  Broadcast(m, kActive);
6052  Collect(kActive, fCollectTimeout);
6053  return 0;
6054  }
6055  return -1;
6056 }
6057 
6058 ////////////////////////////////////////////////////////////////////////////////
6059 /// Send cleanup request for the session specified by tag.
6060 
6061 Int_t TProof::CleanupSession(const char *sessiontag)
6062 {
6063  if (sessiontag) {
6064  TMessage m(kPROOF_CLEANUPSESSION);
6065  m << TString(sessiontag);
6066  Broadcast(m, kActive);
6067  Collect(kActive, fCollectTimeout);
6068  return 0;
6069  }
6070  return -1;
6071 }
6072 
6073 ////////////////////////////////////////////////////////////////////////////////
6074 /// Change query running mode to the one specified by 'mode'.
6075 
6076 void TProof::SetQueryMode(EQueryMode mode)
6077 {
6078  fQueryMode = mode;
6079 
6080  if (gDebug > 0)
6081  Info("SetQueryMode","query mode is set to: %s", fQueryMode == kSync ?
6082  "Sync" : "Async");
6083 }
6084 
6085 ////////////////////////////////////////////////////////////////////////////////
6086 /// Find out the query mode based on the current setting and 'mode'.
6087 
6088 TProof::EQueryMode TProof::GetQueryMode(Option_t *mode) const
6089 {
6090  EQueryMode qmode = fQueryMode;
6091 
6092  if (mode && (strlen(mode) > 0)) {
6093  TString m(mode);
6094  m.ToUpper();
6095  if (m.Contains("ASYN")) {
6096  qmode = kAsync;
6097  } else if (m.Contains("SYNC")) {
6098  qmode = kSync;
6099  }
6100  }
6101 
6102  if (gDebug > 0)
6103  Info("GetQueryMode","query mode is set to: %s", qmode == kSync ?
6104  "Sync" : "Async");
6105 
6106  return qmode;
6107 }
6108 
6109 ////////////////////////////////////////////////////////////////////////////////
6110 /// Execute the specified drawing action on a data set (TDSet).
6111 /// Event- or Entry-lists should be set in the data set object using
6112 /// TDSet::SetEntryList.
6113 /// Returns -1 in case of error or number of selected events otherwise.
6114 
6115 Long64_t TProof::DrawSelect(TDSet *dset, const char *varexp,
6116  const char *selection, Option_t *option,
6117  Long64_t nentries, Long64_t first)
6118 {
6119  if (!IsValid() || !fPlayer) return -1;
6120 
6121  // Make sure that asynchronous processing is not active
6122  if (!IsIdle()) {
6123  Info("DrawSelect","not idle, asynchronous Draw not supported");
6124  return -1;
6125  }
6126  TString opt(option);
6127  Int_t idx = opt.Index("ASYN", 0, TString::kIgnoreCase);
6128  if (idx != kNPOS)
6129  opt.Replace(idx,4,"");
6130 
6131  return fPlayer->DrawSelect(dset, varexp, selection, opt, nentries, first);
6132 }
6133 
6134 ////////////////////////////////////////////////////////////////////////////////
6135 /// Execute the specified drawing action on a data set which is stored on the
6136 /// master with name 'dsetname'.
6137 /// The syntax for dsetname is name[#[dir/]objname], e.g.
6138 /// "mydset" analysis of the first tree in the top dir of the dataset
6139 /// named "mydset"
6140 /// "mydset#T" analysis tree "T" in the top dir of the dataset
6141 /// named "mydset"
6142 /// "mydset#adir/T" analysis tree "T" in the dir "adir" of the dataset
6143 /// named "mydset"
6144 /// "mydset#adir/" analysis of the first tree in the dir "adir" of the
6145 /// dataset named "mydset"
6146 /// The last argument 'enl' specifies an entry- or event-list to be used as
6147 /// event selection.
6148 /// The return value is -1 in case of error and TSelector::GetStatus() in
6149 /// in case of success.
6150 
6151 Long64_t TProof::DrawSelect(const char *dsetname, const char *varexp,
6152  const char *selection, Option_t *option,
6153  Long64_t nentries, Long64_t first, TObject *enl)
6154 {
6155  if (fProtocol < 13) {
6156  Info("Process", "processing 'by name' not supported by the server");
6157  return -1;
6158  }
6159 
6160  TString name(dsetname);
6161  TString obj;
6162  TString dir = "/";
6163  Int_t idxc = name.Index("#");
6164  if (idxc != kNPOS) {
6165  Int_t idxs = name.Index("/", 1, idxc, TString::kExact);
6166  if (idxs != kNPOS) {
6167  obj = name(idxs+1, name.Length());
6168  dir = name(idxc+1, name.Length());
6169  dir.Remove(dir.Index("/") + 1);
6170  name.Remove(idxc);
6171  } else {
6172  obj = name(idxc+1, name.Length());
6173  name.Remove(idxc);
6174  }
6175  } else if (name.Index(":") != kNPOS && name.Index("://") == kNPOS) {
6176  // protection against using ':' instead of '#'
6177  Error("DrawSelect", "bad name syntax (%s): please use"
6178  " a '#' after the dataset name", dsetname);
6179  return -1;
6180  }
6181 
6182  TDSet *dset = new TDSet(name, obj, dir);
6183  // Set entry-list, if required
6184  dset->SetEntryList(enl);
6185  Long64_t retval = DrawSelect(dset, varexp, selection, option, nentries, first);
6186  delete dset;
6187  return retval;
6188 }
6189 
6190 ////////////////////////////////////////////////////////////////////////////////
6191 /// Send STOPPROCESS message to master and workers.
6192 
6193 void TProof::StopProcess(Bool_t abort, Int_t timeout)
6194 {
6195  PDB(kGlobal,2)
6196  Info("StopProcess","enter %d", abort);
6197 
6198  if (!IsValid())
6199  return;
6200 
6201  // Flag that we have been stopped
6202  ERunStatus rst = abort ? TProof::kAborted : TProof::kStopped;
6203  SetRunStatus(rst);
6204 
6205  if (fPlayer)
6206  fPlayer->StopProcess(abort, timeout);
6207 
6208  // Stop any blocking 'Collect' request; on masters we do this only if
6209  // aborting; when stopping, we still need to receive the results
6210  if (TestBit(TProof::kIsClient) || abort)
6211  InterruptCurrentMonitor();
6212 
6213  if (fSlaves->GetSize() == 0)
6214  return;
6215 
6216  // Notify the remote counterpart
6217  TSlave *sl;
6218  TIter next(fSlaves);
6219  while ((sl = (TSlave *)next()))
6220  if (sl->IsValid())
6221  // Ask slave to progate the stop/abort request
6222  sl->StopProcess(abort, timeout);
6223 }
6224 
6225 ////////////////////////////////////////////////////////////////////////////////
6226 /// Signal to disable related switches
6227 
6228 void TProof::DisableGoAsyn()
6229 {
6230  Emit("DisableGoAsyn()");
6231 }
6232 
6233 ////////////////////////////////////////////////////////////////////////////////
6234 /// Send GOASYNC message to the master.
6235 
6236 void TProof::GoAsynchronous()
6237 {
6238  if (!IsValid()) return;
6239 
6240  if (GetRemoteProtocol() < 22) {
6241  Info("GoAsynchronous", "functionality not supported by the server - ignoring");
6242  return;
6243  }
6244 
6245  if (fSync && !IsIdle()) {
6246  TMessage m(kPROOF_GOASYNC);
6247  Broadcast(m);
6248  } else {
6249  Info("GoAsynchronous", "either idle or already in asynchronous mode - ignoring");
6250  }
6251 }
6252 
6253 ////////////////////////////////////////////////////////////////////////////////
6254 /// Receive the log file of the slave with socket s.
6255 
6256 void TProof::RecvLogFile(TSocket *s, Int_t size)
6257 {
6258  const Int_t kMAXBUF = 16384; //32768 //16384 //65536;
6259  char buf[kMAXBUF];
6260 
6261  // If macro saving is enabled prepare macro
6262  if (fSaveLogToMacro && fMacroLog.GetListOfLines()) {
6263  fMacroLog.GetListOfLines()->SetOwner(kTRUE);
6264  fMacroLog.GetListOfLines()->Clear();
6265  }
6266 
6267  // Append messages to active logging unit
6268  Int_t fdout = -1;
6269  if (!fLogToWindowOnly) {
6270  fdout = (fRedirLog) ? fileno(fLogFileW) : fileno(stdout);
6271  if (fdout < 0) {
6272  Warning("RecvLogFile", "file descriptor for outputs undefined (%d):"
6273  " will not log msgs", fdout);
6274  return;
6275  }
6276  lseek(fdout, (off_t) 0, SEEK_END);
6277  }
6278 
6279  Int_t left, rec, r;
6280  Long_t filesize = 0;
6281 
6282  while (filesize < size) {
6283  left = Int_t(size - filesize);
6284  if (left >= kMAXBUF)
6285  left = kMAXBUF-1;
6286  rec = s->RecvRaw(&buf, left);
6287  filesize = (rec > 0) ? (filesize + rec) : filesize;
6288  if (!fLogToWindowOnly && !fSaveLogToMacro) {
6289  if (rec > 0) {
6290 
6291  char *p = buf;
6292  r = rec;
6293  while (r) {
6294  Int_t w;
6295 
6296  w = write(fdout, p, r);
6297 
6298  if (w < 0) {
6299  SysError("RecvLogFile", "error writing to unit: %d", fdout);
6300  break;
6301  }
6302  r -= w;
6303  p += w;
6304  }
6305  } else if (rec < 0) {
6306  Error("RecvLogFile", "error during receiving log file");
6307  break;
6308  }
6309  }
6310  if (rec > 0) {
6311  buf[rec] = 0;
6312  EmitVA("LogMessage(const char*,Bool_t)", 2, buf, kFALSE);
6313  // If macro saving is enabled add to TMacro
6314  if (fSaveLogToMacro) fMacroLog.AddLine(buf);
6315  }
6316  }
6317 
6318  // If idle restore logs to main session window
6319  if (fRedirLog && IsIdle() && !TestBit(TProof::kIsMaster))
6320  fRedirLog = kFALSE;
6321 }
6322 
6323 ////////////////////////////////////////////////////////////////////////////////
6324 /// Notify locally 'msg' to the appropriate units (file, stdout, window)
6325 /// If defined, 'sfx' is added after 'msg' (typically a line-feed);
6326 
6327 void TProof::NotifyLogMsg(const char *msg, const char *sfx)
6328 {
6329  // Must have somenthing to notify
6330  Int_t len = 0;
6331  if (!msg || (len = strlen(msg)) <= 0)
6332  return;
6333 
6334  // Get suffix length if any
6335  Int_t lsfx = (sfx) ? strlen(sfx) : 0;
6336 
6337  // Append messages to active logging unit
6338  Int_t fdout = -1;
6339  if (!fLogToWindowOnly) {
6340  fdout = (fRedirLog) ? fileno(fLogFileW) : fileno(stdout);
6341  if (fdout < 0) {
6342  Warning("NotifyLogMsg", "file descriptor for outputs undefined (%d):"
6343  " will not notify msgs", fdout);
6344  return;
6345  }
6346  lseek(fdout, (off_t) 0, SEEK_END);
6347  }
6348 
6349  if (!fLogToWindowOnly) {
6350  // Write to output unit (stdout or a log file)
6351  if (len > 0) {
6352  char *p = (char *)msg;
6353  Int_t r = len;
6354  while (r) {
6355  Int_t w = write(fdout, p, r);
6356  if (w < 0) {
6357  SysError("NotifyLogMsg", "error writing to unit: %d", fdout);
6358  break;
6359  }
6360  r -= w;
6361  p += w;
6362  }
6363  // Add a suffix, if requested
6364  if (lsfx > 0)
6365  if (write(fdout, sfx, lsfx) != lsfx)
6366  SysError("NotifyLogMsg", "error writing to unit: %d", fdout);
6367  }
6368  }
6369  if (len > 0) {
6370  // Publish the message to the separate window (if the latter is missing
6371  // the message will just get lost)
6372  EmitVA("LogMessage(const char*,Bool_t)", 2, msg, kFALSE);
6373  }
6374 
6375  // If idle restore logs to main session window
6376  if (fRedirLog && IsIdle())
6377  fRedirLog = kFALSE;
6378 }
6379 
6380 ////////////////////////////////////////////////////////////////////////////////
6381 /// Log a message into the appropriate window by emitting a signal.
6382 
6383 void TProof::LogMessage(const char *msg, Bool_t all)
6384 {
6385  PDB(kGlobal,1)
6386  Info("LogMessage","Enter ... %s, 'all: %s", msg ? msg : "",
6387  all ? "true" : "false");
6388 
6389  if (gROOT->IsBatch()) {
6390  PDB(kGlobal,1) Info("LogMessage","GUI not started - use TProof::ShowLog()");
6391  return;
6392  }
6393 
6394  if (msg)
6395  EmitVA("LogMessage(const char*,Bool_t)", 2, msg, all);
6396 
6397  // Re-position at the beginning of the file, if requested.
6398  // This is used by the dialog when it re-opens the log window to
6399  // provide all the session messages
6400  if (all)
6401  lseek(fileno(fLogFileR), (off_t) 0, SEEK_SET);
6402 
6403  const Int_t kMAXBUF = 32768;
6404  char buf[kMAXBUF];
6405  Int_t len;
6406  do {
6407  while ((len = read(fileno(fLogFileR), buf, kMAXBUF-1)) < 0 &&
6408  TSystem::GetErrno() == EINTR)
6409  TSystem::ResetErrno();
6410 
6411  if (len < 0) {
6412  Error("LogMessage", "error reading log file");
6413  break;
6414  }
6415 
6416  if (len > 0) {
6417  buf[len] = 0;
6418  EmitVA("LogMessage(const char*,Bool_t)", 2, buf, kFALSE);
6419  }
6420 
6421  } while (len > 0);
6422 }
6423 
6424 ////////////////////////////////////////////////////////////////////////////////
6425 /// Send to all active slaves servers the current slave group size
6426 /// and their unique id. Returns number of active slaves.
6427 /// Returns -1 in case of error.
6428 
6429 Int_t TProof::SendGroupView()
6430 {
6431  if (!IsValid()) return -1;
6432  if (TestBit(TProof::kIsClient)) return 0;
6433  if (!fSendGroupView) return 0;
6434  fSendGroupView = kFALSE;
6435 
6436  TIter next(fActiveSlaves);
6437  TSlave *sl;
6438 
6439  int bad = 0, cnt = 0, size = GetNumberOfActiveSlaves();
6440  char str[32];
6441 
6442  while ((sl = (TSlave *)next())) {
6443  snprintf(str, 32, "%d %d", cnt, size);
6444  if (sl->GetSocket()->Send(str, kPROOF_GROUPVIEW) == -1) {
6445  MarkBad(sl, "could not send kPROOF_GROUPVIEW message");
6446  bad++;
6447  } else
6448  cnt++;
6449  }
6450 
6451  // Send the group view again in case there was a change in the
6452  // group size due to a bad slave
6453 
6454  if (bad) SendGroupView();
6455 
6456  return GetNumberOfActiveSlaves();
6457 }
6458 
6459 ////////////////////////////////////////////////////////////////////////////////
6460 /// Static method to extract the filename (if any) form a CINT command.
6461 /// Returns kTRUE and the filename in 'fn'; returns kFALSE if not found or not
6462 /// appliable.
6463 
6464 Bool_t TProof::GetFileInCmd(const char *cmd, TString &fn)
6465 {
6466  TString s = cmd;
6467  s = s.Strip(TString::kBoth);
6468 
6469  if (s.Length() > 0 &&
6470  (s.BeginsWith(".L") || s.BeginsWith(".x") || s.BeginsWith(".X"))) {
6471  TString file = s(2, s.Length());
6472  TString acm, arg, io;
6473  fn = gSystem->SplitAclicMode(file, acm, arg, io);
6474  if (!fn.IsNull())
6475  return kTRUE;
6476  }
6477 
6478  // Not found
6479  return kFALSE;
6480 }
6481 
6482 ////////////////////////////////////////////////////////////////////////////////
6483 /// Send command to be executed on the PROOF master and/or slaves.
6484 /// If plusMaster is kTRUE then exeucte on slaves and master too.
6485 /// Command can be any legal command line command. Commands like
6486 /// ".x file.C" or ".L file.C" will cause the file file.C to be send
6487 /// to the PROOF cluster. Returns -1 in case of error, >=0 in case of
6488 /// succes.
6489 
6490 Int_t TProof::Exec(const char *cmd, Bool_t plusMaster)
6491 {
6492  return Exec(cmd, kActive, plusMaster);
6493 }
6494 
6495 ////////////////////////////////////////////////////////////////////////////////
6496 /// Send command to be executed on the PROOF master and/or slaves.
6497 /// Command can be any legal command line command. Commands like
6498 /// ".x file.C" or ".L file.C" will cause the file file.C to be send
6499 /// to the PROOF cluster. Returns -1 in case of error, >=0 in case of
6500 /// succes.
6501 
6502 Int_t TProof::Exec(const char *cmd, ESlaves list, Bool_t plusMaster)
6503 {
6504  if (!IsValid()) return -1;
6505 
6506  TString s = cmd;
6507  s = s.Strip(TString::kBoth);
6508 
6509  if (!s.Length()) return 0;
6510 
6511  // check for macro file and make sure the file is available on all slaves
6512  TString filename;
6513  if (TProof::GetFileInCmd(s.Data(), filename)) {
6514  char *fn = gSystem->Which(TROOT::GetMacroPath(), filename, kReadPermission);
6515  if (fn) {
6516  if (GetNumberOfUniqueSlaves() > 0) {
6517  if (SendFile(fn, kAscii | kForward | kCpBin) < 0) {
6518  Error("Exec", "file %s could not be transfered", fn);
6519  delete [] fn;
6520  return -1;
6521  }
6522  } else {
6523  TString scmd = s(0,3) + fn;
6524  Int_t n = SendCommand(scmd, list);
6525  delete [] fn;
6526  return n;
6527  }
6528  } else {
6529  Error("Exec", "macro %s not found", filename.Data());
6530  return -1;
6531  }
6532  delete [] fn;
6533  }
6534 
6535  if (plusMaster) {
6536  if (IsLite()) {
6537  gROOT->ProcessLine(cmd);
6538  } else {
6539  DeactivateWorker("*");
6540  Int_t res = SendCommand(cmd, list);
6541  ActivateWorker("restore");
6542  if (res < 0)
6543  return res;
6544  }
6545  }
6546  return SendCommand(cmd, list);
6547 }
6548 
6549 ////////////////////////////////////////////////////////////////////////////////
6550 /// Send command to be executed on node of ordinal 'ord' (use "0" for master).
6551 /// Command can be any legal command line command. Commands like
6552 /// ".x file.C" or ".L file.C" will cause the file file.C to be send
6553 /// to the PROOF cluster.
6554 /// If logtomacro is TRUE the text result of the action is saved in the fMacroLog
6555 /// TMacro, accessible via TMacro::GetMacroLog();
6556 /// Returns -1 in case of error, >=0 in case of succes.
6557 
6558 Int_t TProof::Exec(const char *cmd, const char *ord, Bool_t logtomacro)
6559 {
6560  if (!IsValid()) return -1;
6561 
6562  TString s = cmd;
6563  s = s.Strip(TString::kBoth);
6564 
6565  if (!s.Length()) return 0;
6566 
6567  Int_t res = 0;
6568  if (IsLite()) {
6569  gROOT->ProcessLine(cmd);
6570  } else {
6571  Bool_t oldRedirLog = fRedirLog;
6572  fRedirLog = kTRUE;
6573  // Deactivate all workers
6574  DeactivateWorker("*");
6575  fRedirLog = kFALSE;
6576  // Reactivate the target ones, if needed
6577  if (strcmp(ord, "master") && strcmp(ord, "0")) ActivateWorker(ord);
6578  // Honour log-to-macro-saving settings
6579  Bool_t oldSaveLog = fSaveLogToMacro;
6580  fSaveLogToMacro = logtomacro;
6581  res = SendCommand(cmd, kActive);
6582  fSaveLogToMacro = oldSaveLog;
6583  fRedirLog = kTRUE;
6584  ActivateWorker("restore");
6585  fRedirLog = oldRedirLog;
6586  }
6587  // Done
6588  return res;
6589 }
6590 
6591 ////////////////////////////////////////////////////////////////////////////////
6592 /// Send command to be executed on the PROOF master and/or slaves.
6593 /// Command can be any legal command line command, however commands
6594 /// like ".x file.C" or ".L file.C" will not cause the file.C to be
6595 /// transfered to the PROOF cluster. In that case use TProof::Exec().
6596 /// Returns the status send by the remote server as part of the
6597 /// kPROOF_LOGDONE message. Typically this is the return code of the
6598 /// command on the remote side. Returns -1 in case of error.
6599 
6600 Int_t TProof::SendCommand(const char *cmd, ESlaves list)
6601 {
6602  if (!IsValid()) return -1;
6603 
6604  Broadcast(cmd, kMESS_CINT, list);
6605  Collect(list);
6606 
6607  return fStatus;
6608 }
6609 
6610 ////////////////////////////////////////////////////////////////////////////////
6611 /// Get value of environment variable 'env' on node 'ord'
6612 
6613 TString TProof::Getenv(const char *env, const char *ord)
6614 {
6615  // The command to be executed
6616  TString cmd = TString::Format("gSystem->Getenv(\"%s\")", env);
6617  if (Exec(cmd.Data(), ord, kTRUE) != 0) return TString("");
6618  // Get the line
6619  TObjString *os = fMacroLog.GetLineWith("const char");
6620  if (os) {
6621  TString info;
6622  Ssiz_t from = 0;
6623  os->GetString().Tokenize(info, from, "\"");
6624  os->GetString().Tokenize(info, from, "\"");
6625  if (gDebug > 0) Printf("%s: '%s'", env, info.Data());
6626  return info;
6627  }
6628  return TString("");
6629 }
6630 
6631 ////////////////////////////////////////////////////////////////////////////////
6632 /// Get into 'env' the value of integer RC env variable 'rcenv' on node 'ord'
6633 
6634 Int_t TProof::GetRC(const char *rcenv, Int_t &env, const char *ord)
6635 {
6636  // The command to be executed
6637  TString cmd = TString::Format("if (gEnv->Lookup(\"%s\")) { gEnv->GetValue(\"%s\",\"\"); }", rcenv, rcenv);
6638  // Exectute the command saving the logs to macro
6639  if (Exec(cmd.Data(), ord, kTRUE) != 0) return -1;
6640  // Get the line
6641  TObjString *os = fMacroLog.GetLineWith("const char");
6642  Int_t rc = -1;
6643  if (os) {
6644  Ssiz_t fst = os->GetString().First('\"');
6645  Ssiz_t lst = os->GetString().Last('\"');
6646  TString info = os->GetString()(fst+1, lst-fst-1);
6647  if (info.IsDigit()) {
6648  env = info.Atoi();
6649  rc = 0;
6650  if (gDebug > 0)
6651  Printf("%s: %d", rcenv, env);
6652  }
6653  }
6654  return rc;
6655 }
6656 
6657 ////////////////////////////////////////////////////////////////////////////////
6658 /// Get into 'env' the value of double RC env variable 'rcenv' on node 'ord'
6659 
6660 Int_t TProof::GetRC(const char *rcenv, Double_t &env, const char *ord)
6661 {
6662  // The command to be executed
6663  TString cmd = TString::Format("if (gEnv->Lookup(\"%s\")) { gEnv->GetValue(\"%s\",\"\"); }", rcenv, rcenv);
6664  // Exectute the command saving the logs to macro
6665  if (Exec(cmd.Data(), ord, kTRUE) != 0) return -1;
6666  // Get the line
6667  TObjString *os = fMacroLog.GetLineWith("const char");
6668  Int_t rc = -1;
6669  if (os) {
6670  Ssiz_t fst = os->GetString().First('\"');
6671  Ssiz_t lst = os->GetString().Last('\"');
6672  TString info = os->GetString()(fst+1, lst-fst-1);
6673  if (info.IsFloat()) {
6674  env = info.Atof();
6675  rc = 0;
6676  if (gDebug > 0)
6677  Printf("%s: %f", rcenv, env);
6678  }
6679  }
6680  return rc;
6681 }
6682 
6683 ////////////////////////////////////////////////////////////////////////////////
6684 /// Get into 'env' the value of string RC env variable 'rcenv' on node 'ord'
6685 
6686 Int_t TProof::GetRC(const char *rcenv, TString &env, const char *ord)
6687 {
6688  // The command to be executed
6689  TString cmd = TString::Format("if (gEnv->Lookup(\"%s\")) { gEnv->GetValue(\"%s\",\"\"); }", rcenv, rcenv);
6690  // Exectute the command saving the logs to macro
6691  if (Exec(cmd.Data(), ord, kTRUE) != 0) return -1;
6692  // Get the line
6693  TObjString *os = fMacroLog.GetLineWith("const char");
6694  Int_t rc = -1;
6695  if (os) {
6696  Ssiz_t fst = os->GetString().First('\"');
6697  Ssiz_t lst = os->GetString().Last('\"');
6698  env = os->GetString()(fst+1, lst-fst-1);
6699  rc = 0;
6700  if (gDebug > 0)
6701  Printf("%s: %s", rcenv, env.Data());
6702  }
6703  return rc;
6704 }
6705 
6706 ////////////////////////////////////////////////////////////////////////////////
6707 /// Transfer the current state of the master to the active slave servers.
6708 /// The current state includes: the current working directory, etc.
6709 /// Returns the number of active slaves. Returns -1 in case of error.
6710 
6711 Int_t TProof::SendCurrentState(TList *list)
6712 {
6713  if (!IsValid()) return -1;
6714 
6715  // Go to the new directory, reset the interpreter environment and
6716  // tell slave to delete all objects from its new current directory.
6717  Broadcast(gDirectory->GetPath(), kPROOF_RESET, list);
6718 
6719  return GetParallel();
6720 }
6721 
6722 ////////////////////////////////////////////////////////////////////////////////
6723 /// Transfer the current state of the master to the active slave servers.
6724 /// The current state includes: the current working directory, etc.
6725 /// Returns the number of active slaves. Returns -1 in case of error.
6726 
6727 Int_t TProof::SendCurrentState(ESlaves list)
6728 {
6729  if (!IsValid()) return -1;
6730 
6731  // Go to the new directory, reset the interpreter environment and
6732  // tell slave to delete all objects from its new current directory.
6733  Broadcast(gDirectory->GetPath(), kPROOF_RESET, list);
6734 
6735  return GetParallel();
6736 }
6737 
6738 ////////////////////////////////////////////////////////////////////////////////
6739 /// Transfer the initial (i.e. current) state of the master to all
6740 /// slave servers. Currently the initial state includes: log level.
6741 /// Returns the number of active slaves. Returns -1 in case of error.
6742 
6743 Int_t TProof::SendInitialState()
6744 {
6745  if (!IsValid()) return -1;
6746 
6747  SetLogLevel(fLogLevel, gProofDebugMask);
6748 
6749  return GetNumberOfActiveSlaves();
6750 }
6751 
6752 ////////////////////////////////////////////////////////////////////////////////
6753 /// Check if a file needs to be send to the slave. Use the following
6754 /// algorithm:
6755 /// - check if file appears in file map
6756 /// - if yes, get file's modtime and check against time in map,
6757 /// if modtime not same get md5 and compare against md5 in map,
6758 /// if not same return kTRUE.
6759 /// - if no, get file's md5 and modtime and store in file map, ask
6760 /// slave if file exists with specific md5, if yes return kFALSE,
6761 /// if no return kTRUE.
6762 /// The options 'cpopt' define if to copy things from cache to sandbox and what.
6763 /// To retrieve from the cache the binaries associated with the file TProof::kCpBin
6764 /// must be set in cpopt; the default is copy everything.
6765 /// Returns kTRUE in case file needs to be send, returns kFALSE in case
6766 /// file is already on remote node.
6767 
6768 Bool_t TProof::CheckFile(const char *file, TSlave *slave, Long_t modtime, Int_t cpopt)
6769 {
6770  Bool_t sendto = kFALSE;
6771 
6772  // create worker based filename
6773  TString sn = slave->GetName();
6774  sn += ":";
6775  sn += slave->GetOrdinal();
6776  sn += ":";
6777  sn += gSystem->BaseName(file);
6778 
6779  // check if file is in map
6780  FileMap_t::const_iterator it;
6781  if ((it = fFileMap.find(sn)) != fFileMap.end()) {
6782  // file in map
6783  MD5Mod_t md = (*it).second;
6784  if (md.fModtime != modtime) {
6785  TMD5 *md5 = TMD5::FileChecksum(file);
6786  if (md5) {
6787  if ((*md5) != md.fMD5) {
6788  sendto = kTRUE;
6789  md.fMD5 = *md5;
6790  md.fModtime = modtime;
6791  fFileMap[sn] = md;
6792  // When on the master, the master and/or slaves may share
6793  // their file systems and cache. Therefore always make a
6794  // check for the file. If the file already exists with the
6795  // expected md5 the kPROOF_CHECKFILE command will cause the
6796  // file to be copied from cache to slave sandbox.
6797  if (TestBit(TProof::kIsMaster)) {
6798  sendto = kFALSE;
6799  TMessage mess(kPROOF_CHECKFILE);
6800  mess << TString(gSystem->BaseName(file)) << md.fMD5 << cpopt;
6801  slave->GetSocket()->Send(mess);
6802 
6803  fCheckFileStatus = 0;
6804  Collect(slave, fCollectTimeout, kPROOF_CHECKFILE);
6805  sendto = (fCheckFileStatus == 0) ? kTRUE : kFALSE;
6806  }
6807  }
6808  delete md5;
6809  } else {
6810  Error("CheckFile", "could not calculate local MD5 check sum - dont send");
6811  return kFALSE;
6812  }
6813  }
6814  } else {
6815  // file not in map
6816  TMD5 *md5 = TMD5::FileChecksum(file);
6817  MD5Mod_t md;
6818  if (md5) {
6819  md.fMD5 = *md5;
6820  md.fModtime = modtime;
6821  fFileMap[sn] = md;
6822  delete md5;
6823  } else {
6824  Error("CheckFile", "could not calculate local MD5 check sum - dont send");
6825  return kFALSE;
6826  }
6827  TMessage mess(kPROOF_CHECKFILE);
6828  mess << TString(gSystem->BaseName(file)) << md.fMD5 << cpopt;
6829  slave->GetSocket()->Send(mess);
6830 
6831  fCheckFileStatus = 0;
6832  Collect(slave, fCollectTimeout, kPROOF_CHECKFILE);
6833  sendto = (fCheckFileStatus == 0) ? kTRUE : kFALSE;
6834  }
6835 
6836  return sendto;
6837 }
6838 
6839 ////////////////////////////////////////////////////////////////////////////////
6840 /// Send a file to master or slave servers. Returns number of slaves
6841 /// the file was sent to, maybe 0 in case master and slaves have the same
6842 /// file system image, -1 in case of error.
6843 /// If defined, send to worker 'wrk' only.
6844 /// If defined, the full path of the remote path will be rfile.
6845 /// If rfile = "cache" the file is copied to the remote cache instead of the sandbox
6846 /// (to copy to the cache on a different name use rfile = "cache:newname").
6847 /// The mask 'opt' is an or of ESendFileOpt:
6848 ///
6849 /// kAscii (0x0) if set true ascii file transfer is used
6850 /// kBinary (0x1) if set true binary file transfer is used
6851 /// kForce (0x2) if not set an attempt is done to find out
6852 /// whether the file really needs to be downloaded
6853 /// (a valid copy may already exist in the cache
6854 /// from a previous run); the bit is set by
6855 /// UploadPackage, since the check is done elsewhere.
6856 /// kForward (0x4) if set, ask server to forward the file to slave
6857 /// or submaster (meaningless for slave servers).
6858 /// kCpBin (0x8) Retrieve from the cache the binaries associated
6859 /// with the file
6860 /// kCp (0x10) Retrieve the files from the cache
6861 ///
6862 
6863 Int_t TProof::SendFile(const char *file, Int_t opt, const char *rfile, TSlave *wrk)
6864 {
6865  if (!IsValid()) return -1;
6866 
6867  // Use the active slaves list ...
6868  TList *slaves = (rfile && !strcmp(rfile, "cache")) ? fUniqueSlaves : fActiveSlaves;
6869  // ... or the specified slave, if any
6870  if (wrk) {
6871  slaves = new TList();
6872  slaves->Add(wrk);
6873  }
6874 
6875  if (slaves->GetSize() == 0) return 0;
6876 
6877 #ifndef R__WIN32
6878  Int_t fd = open(file, O_RDONLY);
6879 #else
6880  Int_t fd = open(file, O_RDONLY | O_BINARY);
6881 #endif
6882  if (fd < 0) {
6883  SysError("SendFile", "cannot open file %s", file);
6884  return -1;
6885  }
6886 
6887  // Get info about the file
6888  Long64_t size = -1;
6889  Long_t id, flags, modtime = 0;
6890  if (gSystem->GetPathInfo(file, &id, &size, &flags, &modtime) == 1) {
6891  Error("SendFile", "cannot stat file %s", file);
6892  close(fd);
6893  return -1;
6894  }
6895  if (size == 0) {
6896  Error("SendFile", "empty file %s", file);
6897  close(fd);
6898  return -1;
6899  }
6900 
6901  // Decode options
6902  Bool_t bin = (opt & kBinary) ? kTRUE : kFALSE;
6903  Bool_t force = (opt & kForce) ? kTRUE : kFALSE;
6904  Bool_t fw = (opt & kForward) ? kTRUE : kFALSE;
6905 
6906  // Copy options
6907  Int_t cpopt = 0;
6908  if ((opt & kCp)) cpopt |= kCp;
6909  if ((opt & kCpBin)) cpopt |= (kCp | kCpBin);
6910 
6911  const Int_t kMAXBUF = 32768; //16384 //65536;
6912  char buf[kMAXBUF];
6913  Int_t nsl = 0;
6914 
6915  TIter next(slaves);
6916  TSlave *sl;
6917  TString fnam(rfile);
6918  if (fnam == "cache") {
6919  fnam += TString::Format(":%s", gSystem->BaseName(file));
6920  } else if (fnam.IsNull()) {
6921  fnam = gSystem->BaseName(file);
6922  }
6923  // List on which we will collect the results
6924  fStatus = 0;
6925  while ((sl = (TSlave *)next())) {
6926  if (!sl->IsValid())
6927  continue;
6928 
6929  Bool_t sendto = force ? kTRUE : CheckFile(file, sl, modtime, cpopt);
6930  // Don't send the kPROOF_SENDFILE command to real slaves when sendto
6931  // is false. Masters might still need to send the file to newly added
6932  // slaves.
6933  PDB(kPackage,2) {
6934  const char *snd = (sl->fSlaveType == TSlave::kSlave && sendto) ? "" : "not";
6935  Info("SendFile", "%s sending file %s to: %s:%s (%d)", snd,
6936  file, sl->GetName(), sl->GetOrdinal(), sendto);
6937  }
6938  if (sl->fSlaveType == TSlave::kSlave && !sendto)
6939  continue;
6940  // The value of 'size' is used as flag remotely, so we need to
6941  // reset it to 0 if we are not going to send the file
6942  Long64_t siz = sendto ? size : 0;
6943  snprintf(buf, kMAXBUF, "%s %d %lld %d", fnam.Data(), bin, siz, fw);
6944  if (sl->GetSocket()->Send(buf, kPROOF_SENDFILE) == -1) {
6945  MarkBad(sl, "could not send kPROOF_SENDFILE request");
6946  continue;
6947  }
6948 
6949  if (sendto) {
6950 
6951  lseek(fd, 0, SEEK_SET);
6952 
6953  Int_t len;
6954  do {
6955  while ((len = read(fd, buf, kMAXBUF)) < 0 && TSystem::GetErrno() == EINTR)
6956  TSystem::ResetErrno();
6957 
6958  if (len < 0) {
6959  SysError("SendFile", "error reading from file %s", file);
6960  Interrupt(kSoftInterrupt, kActive);
6961  close(fd);
6962  return -1;
6963  }
6964 
6965  if (len > 0 && sl->GetSocket()->SendRaw(buf, len) == -1) {
6966  SysError("SendFile", "error writing to slave %s:%s (now offline)",
6967  sl->GetName(), sl->GetOrdinal());
6968  MarkBad(sl, "sendraw failure");
6969  sl = 0;
6970  break;
6971  }
6972 
6973  } while (len > 0);
6974 
6975  nsl++;
6976  }
6977  // Wait for the operation to be done
6978  if (sl)
6979  Collect(sl, fCollectTimeout, kPROOF_SENDFILE);
6980  }
6981 
6982  close(fd);
6983 
6984  // Cleanup temporary list, if any
6985  if (slaves != fActiveSlaves && slaves != fUniqueSlaves)
6986  SafeDelete(slaves);
6987 
6988  // We return failure is at least one unique worker failed
6989  return (fStatus != 0) ? -1 : nsl;
6990 }
6991 
6992 ////////////////////////////////////////////////////////////////////////////////
6993 /// Sends an object to master and workers and expect them to send back a
6994 /// message with the output of its TObject::Print(). Returns -1 on error, the
6995 /// number of workers that received the objects on success.
6996 
6997 Int_t TProof::Echo(const TObject *obj)
6998 {
6999  if (!IsValid() || !obj) return -1;
7000  TMessage mess(kPROOF_ECHO);
7001  mess.WriteObject(obj);
7002  return Broadcast(mess);
7003 }
7004 
7005 ////////////////////////////////////////////////////////////////////////////////
7006 /// Sends a string to master and workers and expect them to echo it back to
7007 /// the client via a message. It is a special case of the generic Echo()
7008 /// that works with TObjects. Returns -1 on error, the number of workers that
7009 /// received the message on success.
7010 
7011 Int_t TProof::Echo(const char *str)
7012 {
7013  TObjString *os = new TObjString(str);
7014  Int_t rv = Echo(os);
7015  delete os;
7016  return rv;
7017 }
7018 
7019 ////////////////////////////////////////////////////////////////////////////////
7020 /// Send object to master or slave servers. Returns number of slaves object
7021 /// was sent to, -1 in case of error.
7022 
7023 Int_t TProof::SendObject(const TObject *obj, ESlaves list)
7024 {
7025  if (!IsValid() || !obj) return -1;
7026 
7027  TMessage mess(kMESS_OBJECT);
7028 
7029  mess.WriteObject(obj);
7030  return Broadcast(mess, list);
7031 }
7032 
7033 ////////////////////////////////////////////////////////////////////////////////
7034 /// Send print command to master server. Returns number of slaves message
7035 /// was sent to. Returns -1 in case of error.
7036 
7037 Int_t TProof::SendPrint(Option_t *option)
7038 {
7039  if (!IsValid()) return -1;
7040 
7041  Broadcast(option, kPROOF_PRINT, kActive);
7042  return Collect(kActive, fCollectTimeout);
7043 }
7044 
7045 ////////////////////////////////////////////////////////////////////////////////
7046 /// Set server logging level.
7047 
7048 void TProof::SetLogLevel(Int_t level, UInt_t mask)
7049 {
7050  char str[32];
7051  fLogLevel = level;
7052  gProofDebugLevel = level;
7053  gProofDebugMask = (TProofDebug::EProofDebugMask) mask;
7054  snprintf(str, 32, "%d %u", level, mask);
7055  Broadcast(str, kPROOF_LOGLEVEL, kAll);
7056 }
7057 
7058 ////////////////////////////////////////////////////////////////////////////////
7059 /// Switch ON/OFF the real-time logging facility. When this option is
7060 /// ON, log messages from processing are sent back as they come, instead of
7061 /// being sent back at the end in one go. This may help debugging or monitoring
7062 /// in some cases, but, depending on the amount of log, it may have significant
7063 /// consequencies on the load over the network, so it must be used with care.
7064 
7065 void TProof::SetRealTimeLog(Bool_t on)
7066 {
7067  if (IsValid()) {
7068  TMessage mess(kPROOF_REALTIMELOG);
7069  mess << on;
7070  Broadcast(mess);
7071  } else {
7072  Warning("SetRealTimeLog","session is invalid - do nothing");
7073  }
7074 }
7075 
7076 ////////////////////////////////////////////////////////////////////////////////
7077 /// Tell PROOF how many slaves to use in parallel. If random is TRUE a random
7078 /// selection is done (if nodes is less than the available nodes).
7079 /// Returns the number of parallel slaves. Returns -1 in case of error.
7080 
7081 Int_t TProof::SetParallelSilent(Int_t nodes, Bool_t random)
7082 {
7083  if (!IsValid()) return -1;
7084 
7085  if (TestBit(TProof::kIsMaster)) {
7086  if (!fDynamicStartup) GoParallel(nodes, kFALSE, random);
7087  return SendCurrentState();
7088  } else {
7089  if (nodes < 0) {
7090  PDB(kGlobal,1) Info("SetParallelSilent", "request all nodes");
7091  } else {
7092  PDB(kGlobal,1) Info("SetParallelSilent", "request %d node%s", nodes,
7093  nodes == 1 ? "" : "s");
7094  }
7095  TMessage mess(kPROOF_PARALLEL);
7096  mess << nodes << random;
7097  Broadcast(mess);
7098  Collect(kActive, fCollectTimeout);
7099  Int_t n = GetParallel();
7100  PDB(kGlobal,1) Info("SetParallelSilent", "got %d node%s", n, n == 1 ? "" : "s");
7101  return n;
7102  }
7103 }
7104 
7105 ////////////////////////////////////////////////////////////////////////////////
7106 /// Tell PROOF how many slaves to use in parallel. Returns the number of
7107 /// parallel slaves. Returns -1 in case of error.
7108 
7109 Int_t TProof::SetParallel(Int_t nodes, Bool_t random)
7110 {
7111  // If delayed startup reset settings, if required
7112  if (fDynamicStartup && nodes < 0) {
7113  if (gSystem->Getenv("PROOF_NWORKERS")) gSystem->Unsetenv("PROOF_NWORKERS");
7114  }
7115 
7116  Int_t n = SetParallelSilent(nodes, random);
7117  if (TestBit(TProof::kIsClient)) {
7118  if (n < 1) {
7119  Printf("PROOF set to sequential mode");
7120  } else {
7121  TString subfix = (n == 1) ? "" : "s";
7122  if (random)
7123  subfix += ", randomly selected";
7124  Printf("PROOF set to parallel mode (%d worker%s)", n, subfix.Data());
7125  }
7126  } else if (fDynamicStartup && nodes >= 0) {
7127  if (gSystem->Getenv("PROOF_NWORKERS")) gSystem->Unsetenv("PROOF_NWORKERS");
7128  gSystem->Setenv("PROOF_NWORKERS", TString::Format("%d", nodes));
7129  }
7130  return n;
7131 }
7132 
7133 ////////////////////////////////////////////////////////////////////////////////
7134 /// Add nWorkersToAdd workers to current list of workers. This function is
7135 /// works on the master only, and only when an analysis is ongoing. A message
7136 /// is sent back to the client when we go "more" parallel.
7137 /// Returns -1 on error, number of total (not added!) workers on success.
7138 
7139 Int_t TProof::GoMoreParallel(Int_t nWorkersToAdd)
7140 {
7141  if (!IsValid() || !IsMaster() || IsIdle()) {
7142  Error("GoMoreParallel", "can't invoke here -- should not happen!");
7143  return -1;
7144  }
7145  if (!gProofServ && !IsLite()) {
7146  Error("GoMoreParallel", "no ProofServ available nor Lite -- should not happen!");
7147  return -1;
7148  }
7149 
7150  TSlave *sl = 0x0;
7151  TIter next( fSlaves );
7152  Int_t nAddedWorkers = 0;
7153 
7154  while (((nAddedWorkers < nWorkersToAdd) || (nWorkersToAdd == -1)) &&
7155  (( sl = dynamic_cast<TSlave *>( next() ) ))) {
7156 
7157  // If worker is of an invalid type, break everything: it should not happen!
7158  if ((sl->GetSlaveType() != TSlave::kSlave) &&
7159  (sl->GetSlaveType() != TSlave::kMaster)) {
7160  Error("GoMoreParallel", "TSlave is neither a Master nor a Slave: %s:%s",
7161  sl->GetName(), sl->GetOrdinal());
7162  R__ASSERT(0);
7163  }
7164 
7165  // Skip current worker if it is not a good candidate
7166  if ((!sl->IsValid()) || (fBadSlaves->FindObject(sl)) ||
7167  (strcmp("IGNORE", sl->GetImage()) == 0)) {
7168  PDB(kGlobal, 2)
7169  Info("GoMoreParallel", "Worker %s:%s won't be considered",
7170  sl->GetName(), sl->GetOrdinal());
7171  continue;
7172  }
7173 
7174  // Worker is good but it is already active: skip it
7175  if (fActiveSlaves->FindObject(sl)) {
7176  Info("GoMoreParallel", "Worker %s:%s is already active: skipping",
7177  sl->GetName(), sl->GetOrdinal());
7178  continue;
7179  }
7180 
7181  //
7182  // From here on: worker is a good candidate
7183  //
7184 
7185  if (sl->GetSlaveType() == TSlave::kSlave) {
7186  sl->SetStatus(TSlave::kActive);
7187  fActiveSlaves->Add(sl);
7188  fInactiveSlaves->Remove(sl);
7189  fActiveMonitor->Add(sl->GetSocket());
7190  nAddedWorkers++;
7191  PDB(kGlobal, 2)
7192  Info("GoMoreParallel", "Worker %s:%s marked as active!",
7193  sl->GetName(), sl->GetOrdinal());
7194  }
7195  else {
7196  // Can't add masters dynamically: this should not happen!
7197  Error("GoMoreParallel", "Dynamic addition of master is not supported");
7198  R__ASSERT(0);
7199  }
7200 
7201  } // end loop over all slaves
7202 
7203  // Get slave status (will set the slaves fWorkDir correctly)
7204  PDB(kGlobal, 3)
7205  Info("GoMoreParallel", "Will invoke AskStatistics() -- implies a Collect()");
7206  AskStatistics();
7207 
7208  // Find active slaves with unique image
7209  PDB(kGlobal, 3)
7210  Info("GoMoreParallel", "Will invoke FindUniqueSlaves()");
7211  FindUniqueSlaves();
7212 
7213  // Send new group-view to slaves
7214  PDB(kGlobal, 3)
7215  Info("GoMoreParallel", "Will invoke SendGroupView()");
7216  SendGroupView();
7217 
7218  PDB(kGlobal, 3)
7219  Info("GoMoreParallel", "Will invoke GetParallel()");
7220  Int_t nTotalWorkers = GetParallel();
7221 
7222  // Notify the client that we've got more workers, and print info on
7223  // Master's log as well
7224  TString s;
7225  s.Form("PROOF just went more parallel (%d additional worker%s, %d worker%s total)",
7226  nAddedWorkers, (nAddedWorkers == 1) ? "" : "s",
7227  nTotalWorkers, (nTotalWorkers == 1) ? "" : "s");
7228  if (gProofServ) gProofServ->SendAsynMessage(s);
7229  Info("GoMoreParallel", "%s", s.Data());
7230 
7231  return nTotalWorkers;
7232 }
7233 
7234 ////////////////////////////////////////////////////////////////////////////////
7235 /// Go in parallel mode with at most "nodes" slaves. Since the fSlaves
7236 /// list is sorted by slave performace the active list will contain first
7237 /// the most performant nodes. Returns the number of active slaves.
7238 /// If random is TRUE, and nodes is less than the number of available workers,
7239 /// a random selection is done.
7240 /// Returns -1 in case of error.
7241 
7242 Int_t TProof::GoParallel(Int_t nodes, Bool_t attach, Bool_t random)
7243 {
7244  if (!IsValid()) return -1;
7245 
7246  fActiveSlaves->Clear();
7247  fActiveMonitor->RemoveAll();
7248 
7249  // Prepare the list of candidates first.
7250  // Algorithm depends on random option.
7251  TSlave *sl = 0;
7252  TList *wlst = new TList;
7253  TIter nxt(fSlaves);
7254  fInactiveSlaves->Clear();
7255  while ((sl = (TSlave *)nxt())) {
7256  if (sl->IsValid() && !fBadSlaves->FindObject(sl)) {
7257  if (strcmp("IGNORE", sl->GetImage()) == 0) continue;
7258  if ((sl->GetSlaveType() != TSlave::kSlave) &&
7259  (sl->GetSlaveType() != TSlave::kMaster)) {
7260  Error("GoParallel", "TSlave is neither Master nor Slave");
7261  R__ASSERT(0);
7262  }
7263  // Good candidate
7264  wlst->Add(sl);
7265  // Set it inactive
7266  fInactiveSlaves->Add(sl);
7267  sl->SetStatus(TSlave::kInactive);
7268  }
7269  }
7270  Int_t nwrks = (nodes < 0 || nodes > wlst->GetSize()) ? wlst->GetSize() : nodes;
7271  int cnt = 0;
7272  fEndMaster = TestBit(TProof::kIsMaster) ? kTRUE : kFALSE;
7273  while (cnt < nwrks) {
7274  // Random choice, if requested
7275  if (random) {
7276  Int_t iwrk = (Int_t) (gRandom->Rndm() * wlst->GetSize());
7277  sl = (TSlave *) wlst->At(iwrk);
7278  } else {
7279  // The first available
7280  sl = (TSlave *) wlst->First();
7281  }
7282  if (!sl) {
7283  Error("GoParallel", "attaching to candidate!");
7284  break;
7285  }
7286  // Remove from the list
7287  wlst->Remove(sl);
7288 
7289  Int_t slavenodes = 0;
7290  if (sl->GetSlaveType() == TSlave::kSlave) {
7291  sl->SetStatus(TSlave::kActive);
7292  fActiveSlaves->Add(sl);
7293  fInactiveSlaves->Remove(sl);
7294  fActiveMonitor->Add(sl->GetSocket());
7295  slavenodes = 1;
7296  } else if (sl->GetSlaveType() == TSlave::kMaster) {
7297  fEndMaster = kFALSE;
7298  TMessage mess(kPROOF_PARALLEL);
7299  if (!attach) {
7300  Int_t nn = (nodes < 0) ? -1 : nodes-cnt;
7301  mess << nn;
7302  } else {
7303  // To get the number of slaves
7304  mess.SetWhat(kPROOF_LOGFILE);
7305  mess << -1 << -1;
7306  }
7307  if (sl->GetSocket()->Send(mess) == -1) {
7308  MarkBad(sl, "could not send kPROOF_PARALLEL or kPROOF_LOGFILE request");
7309  slavenodes = 0;
7310  } else {
7311  Collect(sl, fCollectTimeout);
7312  if (sl->IsValid()) {
7313  sl->SetStatus(TSlave::kActive);
7314  fActiveSlaves->Add(sl);
7315  fInactiveSlaves->Remove(sl);
7316  fActiveMonitor->Add(sl->GetSocket());
7317  if (sl->GetParallel() > 0) {
7318  slavenodes = sl->GetParallel();
7319  } else {
7320  // Sequential mode: the master acts as a worker
7321  slavenodes = 1;
7322  }
7323  } else {
7324  MarkBad(sl, "collect failed after kPROOF_PARALLEL or kPROOF_LOGFILE request");
7325  slavenodes = 0;
7326  }
7327  }
7328  }
7329  // 'slavenodes' may be different than 1 in multimaster setups
7330  cnt += slavenodes;
7331  }
7332 
7333  // Cleanup list
7334  wlst->SetOwner(0);
7335  SafeDelete(wlst);
7336 
7337  // Get slave status (will set the slaves fWorkDir correctly)
7338  AskStatistics();
7339 
7340  // Find active slaves with unique image
7341  FindUniqueSlaves();
7342 
7343  // Send new group-view to slaves
7344  if (!attach)
7345  SendGroupView();
7346 
7347  Int_t n = GetParallel();
7348 
7349  if (TestBit(TProof::kIsClient)) {
7350  if (n < 1)
7351  printf("PROOF set to sequential mode\n");
7352  else
7353  printf("PROOF set to parallel mode (%d worker%s)\n",
7354  n, n == 1 ? "" : "s");
7355  }
7356 
7357  PDB(kGlobal,1) Info("GoParallel", "got %d node%s", n, n == 1 ? "" : "s");
7358  return n;
7359 }
7360 
7361 ////////////////////////////////////////////////////////////////////////////////
7362 /// List contents of the data directory in the sandbox.
7363 /// This is the place where files produced by the client queries are kept
7364 
7365 void TProof::ShowData()
7366 {
7367  if (!IsValid() || !fManager) return;
7368 
7369  // This is run via the manager
7370  fManager->Find("~/data", "-type f", "all");
7371 }
7372 
7373 ////////////////////////////////////////////////////////////////////////////////
7374 /// Remove files for the data directory.
7375 /// The option 'what' can take the values:
7376 /// kPurge remove all files and directories under '~/data'
7377 /// kUnregistered remove only files not in registered datasets (default)
7378 /// kDataset remove files belonging to dataset 'dsname'
7379 /// User is prompt for confirmation, unless kForceClear is ORed with the option
7380 
7381 void TProof::ClearData(UInt_t what, const char *dsname)
7382 {
7383  if (!IsValid() || !fManager) return;
7384 
7385  // Check whether we need to prompt
7386  TString prompt, a("Y");
7387  Bool_t force = (what & kForceClear) ? kTRUE : kFALSE;
7388  Bool_t doask = (!force && IsTty()) ? kTRUE : kFALSE;
7389 
7390  // If all just send the request
7391  if ((what & TProof::kPurge)) {
7392  // Prompt, if requested
7393  if (doask && !Prompt("Do you really want to remove all data files")) return;
7394  if (fManager->Rm("~/data/*", "-rf", "all") < 0)
7395  Warning("ClearData", "problems purging data directory");
7396  return;
7397  } else if ((what & TProof::kDataset)) {
7398  // We must have got a name
7399  if (!dsname || strlen(dsname) <= 0) {
7400  Error("ClearData", "dataset name mandatory when removing a full dataset");
7401  return;
7402  }
7403  // Check if the dataset is registered
7404  if (!ExistsDataSet(dsname)) {
7405  Error("ClearData", "dataset '%s' does not exists", dsname);
7406  return;
7407  }
7408  // Get the file content
7409  TFileCollection *fc = GetDataSet(dsname);
7410  if (!fc) {
7411  Error("ClearData", "could not retrieve info about dataset '%s'", dsname);
7412  return;
7413  }
7414  // Prompt, if requested
7415  TString pmpt = TString::Format("Do you really want to remove all data files"
7416  " of dataset '%s'", dsname);
7417  if (doask && !Prompt(pmpt.Data())) return;
7418 
7419  // Loop through the files
7420  Bool_t rmds = kTRUE;
7421  TIter nxf(fc->GetList());
7422  TFileInfo *fi = 0;
7423  Int_t rfiles = 0, nfiles = fc->GetList()->GetSize();
7424  while ((fi = (TFileInfo *) nxf())) {
7425  // Fill the host info
7426  TString host, file;
7427  // Take info from the current url
7428  if (!(fi->GetFirstUrl())) {
7429  Error("ClearData", "GetFirstUrl() returns NULL for '%s' - skipping",
7430  fi->GetName());
7431  continue;
7432  }
7433  TUrl uf(*(fi->GetFirstUrl()));
7434  file = uf.GetFile();
7435  host = uf.GetHost();
7436  // Now search for any "file:" url
7437  Int_t nurl = fi->GetNUrls();
7438  fi->ResetUrl();
7439  TUrl *up = 0;
7440  while (nurl-- && fi->NextUrl()) {
7441  up = fi->GetCurrentUrl();
7442  if (!strcmp(up->GetProtocol(), "file")) {
7443  TString opt(up->GetOptions());
7444  if (opt.BeginsWith("node=")) {
7445  host=opt;
7446  host.ReplaceAll("node=","");
7447  file = up->GetFile();
7448  break;
7449  }
7450  }
7451  }
7452  // Issue a remove request now
7453  if (fManager->Rm(file.Data(), "-f", host.Data()) != 0) {
7454  Error("ClearData", "problems removing '%s'", file.Data());
7455  // Some files not removed: keep the meta info about this dataset
7456  rmds = kFALSE;
7457  }
7458  rfiles++;
7459  ClearDataProgress(rfiles, nfiles);
7460  }
7461  fprintf(stderr, "\n");
7462  if (rmds) {
7463  // All files were removed successfully: remove also the dataset meta info
7464  RemoveDataSet(dsname);
7465  }
7466  } else if (what & TProof::kUnregistered) {
7467 
7468  // Get the existing files
7469  TString outtmp("ProofClearData_");
7470  FILE *ftmp = gSystem->TempFileName(outtmp);
7471  if (!ftmp) {
7472  Error("ClearData", "cannot create temp file for logs");
7473  return;
7474  }
7475  fclose(ftmp);
7476  RedirectHandle_t h;
7477  gSystem->RedirectOutput(outtmp.Data(), "w", &h);
7478  ShowData();
7479  gSystem->RedirectOutput(0, 0, &h);
7480  // Parse the output file now
7481  std::ifstream in;
7482  in.open(outtmp.Data());
7483  if (!in.is_open()) {
7484  Error("ClearData", "could not open temp file for logs: %s", outtmp.Data());
7485  gSystem->Unlink(outtmp);
7486  return;
7487  }
7488  // Go through
7489  Int_t nfiles = 0;
7490  TMap *afmap = new TMap;
7491  TString line, host, file;
7492  Int_t from = 0;
7493  while (in.good()) {
7494  line.ReadLine(in);
7495  if (line.IsNull()) continue;
7496  while (line.EndsWith("\n")) { line.Strip(TString::kTrailing, '\n'); }
7497  from = 0;
7498  host = "";
7499  if (!line.Tokenize(host, from, "| ")) continue;
7500  file = "";
7501  if (!line.Tokenize(file, from, "| ")) continue;
7502  if (!host.IsNull() && !file.IsNull()) {
7503  TList *fl = (TList *) afmap->GetValue(host.Data());
7504  if (!fl) {
7505  fl = new TList();
7506  fl->SetName(host);
7507  afmap->Add(new TObjString(host), fl);
7508  }
7509  fl->Add(new TObjString(file));
7510  nfiles++;
7511  PDB(kDataset,2)
7512  Info("ClearData", "added info for: h:%s, f:%s", host.Data(), file.Data());
7513  } else {
7514  Warning("ClearData", "found incomplete line: '%s'", line.Data());
7515  }
7516  }
7517  // Close and remove the file
7518  in.close();
7519  gSystem->Unlink(outtmp);
7520 
7521  // Get registered data files
7522  TString sel = TString::Format("/%s/%s/", GetGroup(), GetUser());
7523  TMap *fcmap = GetDataSets(sel);
7524  if (!fcmap || (fcmap && fcmap->GetSize() <= 0)) {
7525  PDB(kDataset,1)
7526  Warning("ClearData", "no dataset beloning to '%s'", sel.Data());
7527  SafeDelete(fcmap);
7528  }
7529 
7530  // Go thorugh and prepare the lists per node
7531  TString opt;
7532  TObjString *os = 0;
7533  if (fcmap) {
7534  TIter nxfc(fcmap);
7535  while ((os = (TObjString *) nxfc())) {
7536  TFileCollection *fc = 0;
7537  if ((fc = (TFileCollection *) fcmap->GetValue(os))) {
7538  TFileInfo *fi = 0;
7539  TIter nxfi(fc->GetList());
7540  while ((fi = (TFileInfo *) nxfi())) {
7541  // Get special "file:" url
7542  fi->ResetUrl();
7543  Int_t nurl = fi->GetNUrls();
7544  TUrl *up = 0;
7545  while (nurl-- && fi->NextUrl()) {
7546  up = fi->GetCurrentUrl();
7547  if (!strcmp(up->GetProtocol(), "file")) {
7548  opt = up->GetOptions();
7549  if (opt.BeginsWith("node=")) {
7550  host=opt;
7551  host.ReplaceAll("node=","");
7552  file = up->GetFile();
7553  PDB(kDataset,2)
7554  Info("ClearData", "found: host: %s, file: %s", host.Data(), file.Data());
7555  // Remove this from the full list, if there
7556  TList *fl = (TList *) afmap->GetValue(host.Data());
7557  if (fl) {
7558  TObjString *fn = (TObjString *) fl->FindObject(file.Data());
7559  if (fn) {
7560  fl->Remove(fn);
7561  SafeDelete(fn);
7562  nfiles--;
7563  } else {
7564  Warning("ClearData",
7565  "registered file '%s' not found in the full list!",
7566  file.Data());
7567  }
7568  }
7569  break;
7570  }
7571  }
7572  }
7573  }
7574  }
7575  }
7576  // Clean up the the received map
7577  if (fcmap) fcmap->SetOwner(kTRUE);
7578  SafeDelete(fcmap);
7579  }
7580  // List of the files to be removed
7581  Info("ClearData", "%d unregistered files to be removed:", nfiles);
7582  afmap->Print();
7583  // Prompt, if requested
7584  TString pmpt = TString::Format("Do you really want to remove all %d"
7585  " unregistered data files", nfiles);
7586  if (doask && !Prompt(pmpt.Data())) return;
7587 
7588  // Remove one by one; we may implement a bloc remove in the future
7589  Int_t rfiles = 0;
7590  TIter nxls(afmap);
7591  while ((os = (TObjString *) nxls())) {
7592  TList *fl = 0;
7593  if ((fl = (TList *) afmap->GetValue(os))) {
7594  TIter nxf(fl);
7595  TObjString *fn = 0;
7596  while ((fn = (TObjString *) nxf())) {
7597  // Issue a remove request now
7598  if (fManager->Rm(fn->GetName(), "-f", os->GetName()) != 0) {
7599  Error("ClearData", "problems removing '%s' on host '%s'",
7600  fn->GetName(), os->GetName());
7601  }
7602  rfiles++;
7603  ClearDataProgress(rfiles, nfiles);
7604  }
7605  }
7606  }
7607  fprintf(stderr, "\n");
7608  // Final cleanup
7609  afmap->SetOwner(kTRUE);
7610  SafeDelete(afmap);
7611  }
7612 }
7613 
7614 ////////////////////////////////////////////////////////////////////////////////
7615 /// Prompt the question 'p' requiring an answer y,Y,n,N
7616 /// Return kTRUE is the answer was y or Y, kFALSE in all other cases.
7617 
7618 Bool_t TProof::Prompt(const char *p)
7619 {
7620  TString pp(p);
7621  if (!pp.Contains("?")) pp += "?";
7622  if (!pp.Contains("[y/N]")) pp += " [y/N]";
7623  TString a = Getline(pp.Data());
7624  if (a != "\n" && a[0] != 'y' && a[0] != 'Y' && a[0] != 'n' && a[0] != 'N') {
7625  Printf("Please answer y, Y, n or N");
7626  // Unclear answer: assume negative
7627  return kFALSE;
7628  } else if (a == "\n" || a[0] == 'n' || a[0] == 'N') {
7629  // Explicitly Negative answer
7630  return kFALSE;
7631  }
7632  // Explicitly Positive answer
7633  return kTRUE;
7634 }
7635 
7636 ////////////////////////////////////////////////////////////////////////////////
7637 /// Progress bar for clear data
7638 
7639 void TProof::ClearDataProgress(Int_t r, Int_t t)
7640 {
7641  fprintf(stderr, "[TProof::ClearData] Total %5d files\t|", t);
7642  for (Int_t l = 0; l < 20; l++) {
7643  if (r > 0 && t > 0) {
7644  if (l < 20*r/t)
7645  fprintf(stderr, "=");
7646  else if (l == 20*r/t)
7647  fprintf(stderr, ">");
7648  else if (l > 20*r/t)
7649  fprintf(stderr, ".");
7650  } else
7651  fprintf(stderr, "=");
7652  }
7653  fprintf(stderr, "| %.02f %% \r", 100.0*(t ? (r/t) : 1));
7654 }
7655 
7656 ////////////////////////////////////////////////////////////////////////////////
7657 /// List contents of file cache. If all is true show all caches also on
7658 /// slaves. If everything is ok all caches are to be the same.
7659 
7660 void TProof::ShowCache(Bool_t all)
7661 {
7662  if (!IsValid()) return;
7663 
7664  TMessage mess(kPROOF_CACHE);
7665  mess << Int_t(kShowCache) << all;
7666  Broadcast(mess, kUnique);
7667 
7668  if (all) {
7669  TMessage mess2(kPROOF_CACHE);
7670  mess2 << Int_t(kShowSubCache) << all;
7671  Broadcast(mess2, fNonUniqueMasters);
7672 
7673  Collect(kAllUnique, fCollectTimeout);
7674  } else {
7675  Collect(kUnique, fCollectTimeout);
7676  }
7677 }
7678 
7679 ////////////////////////////////////////////////////////////////////////////////
7680 /// Remove file from all file caches. If file is 0 or "" or "*", remove all
7681 /// the files
7682 
7683 void TProof::ClearCache(const char *file)
7684 {
7685  if (!IsValid()) return;
7686 
7687  TMessage mess(kPROOF_CACHE);
7688  mess << Int_t(kClearCache) << TString(file);
7689  Broadcast(mess, kUnique);
7690 
7691  TMessage mess2(kPROOF_CACHE);
7692  mess2 << Int_t(kClearSubCache) << TString(file);
7693  Broadcast(mess2, fNonUniqueMasters);
7694 
7695  Collect(kAllUnique);
7696 
7697  // clear file map so files get send again to remote nodes
7698  fFileMap.clear();
7699 }
7700 
7701 ////////////////////////////////////////////////////////////////////////////////
7702 /// Exec system command 'cmd'. If fdout > -1, append the output to fdout.
7703 
7704 void TProof::SystemCmd(const char *cmd, Int_t fdout)
7705 {
7706  if (fdout < 0) {
7707  // Exec directly the command
7708  gSystem->Exec(cmd);
7709  } else {
7710  // Exec via a pipe
7711  FILE *fin = gSystem->OpenPipe(cmd, "r");
7712  if (fin) {
7713  // Now we go
7714  char line[2048];
7715  while (fgets(line, 2048, fin)) {
7716  Int_t r = strlen(line);
7717  if (r > 0) {
7718  if (write(fdout, line, r) < 0) {
7719  ::Warning("TProof::SystemCmd",
7720  "errno %d writing to file descriptor %d",
7721  TSystem::GetErrno(), fdout);
7722  }
7723  } else {
7724  // Done
7725  break;
7726  }
7727  }
7728  gSystem->ClosePipe(fin);
7729  }
7730  }
7731 }
7732 
7733 ////////////////////////////////////////////////////////////////////////////////
7734 /// List contents of package directory. If all is true show all package
7735 /// directories also on slaves. If everything is ok all package directories
7736 /// should be the same. If redir is kTRUE the result is redirected to the log
7737 /// file (option available for internal actions).
7738 
7739 void TProof::ShowPackages(Bool_t all, Bool_t redirlog)
7740 {
7741  if (!IsValid()) return;
7742 
7743  Bool_t oldredir = fRedirLog;
7744  if (redirlog) fRedirLog = kTRUE;
7745 
7746  // Active logging unit
7747  FILE *fout = (fRedirLog) ? fLogFileW : stdout;
7748  if (!fout) {
7749  Warning("ShowPackages", "file descriptor for outputs undefined (%p):"
7750  " will not log msgs", fout);
7751  return;
7752  }
7753  lseek(fileno(fout), (off_t) 0, SEEK_END);
7754 
7755  if (TestBit(TProof::kIsClient)) {
7756  fPackMgr->Show();
7757  }
7758 
7759  // Nothing more to do if we are a Lite-session
7760  if (IsLite()) {
7761  fRedirLog = oldredir;
7762  return;
7763  }
7764 
7765  TMessage mess(kPROOF_CACHE);
7766  mess << Int_t(kShowPackages) << all;
7767  Broadcast(mess, kUnique);
7768 
7769  if (all) {
7770  TMessage mess2(kPROOF_CACHE);
7771  mess2 << Int_t(kShowSubPackages) << all;
7772  Broadcast(mess2, fNonUniqueMasters);
7773 
7774  Collect(kAllUnique, fCollectTimeout);
7775  } else {
7776  Collect(kUnique, fCollectTimeout);
7777  }
7778  // Restore logging option
7779  fRedirLog = oldredir;
7780 }
7781 
7782 ////////////////////////////////////////////////////////////////////////////////
7783 /// List which packages are enabled. If all is true show enabled packages
7784 /// for all active slaves. If everything is ok all active slaves should
7785 /// have the same packages enabled.
7786 
7787 void TProof::ShowEnabledPackages(Bool_t all)
7788 {
7789  if (!IsValid()) return;
7790 
7791  if (TestBit(TProof::kIsClient)) {
7792  fPackMgr->ShowEnabled(TString::Format("*** Enabled packages on client on %s\n",
7793  gSystem->HostName()));
7794  }
7795 
7796  // Nothing more to do if we are a Lite-session
7797  if (IsLite()) return;
7798 
7799  TMessage mess(kPROOF_CACHE);
7800  mess << Int_t(kShowEnabledPackages) << all;
7801  Broadcast(mess);
7802  Collect(kActive, fCollectTimeout);
7803 }
7804 
7805 ////////////////////////////////////////////////////////////////////////////////
7806 /// Remove all packages.
7807 /// Returns 0 in case of success and -1 in case of error.
7808 
7809 Int_t TProof::ClearPackages()
7810 {
7811  if (!IsValid()) return -1;
7812 
7813  if (UnloadPackages() == -1)
7814  return -1;
7815 
7816  if (DisablePackages() == -1)
7817  return -1;
7818 
7819  return fStatus;
7820 }
7821 
7822 ////////////////////////////////////////////////////////////////////////////////
7823 /// Remove a specific package.
7824 /// Returns 0 in case of success and -1 in case of error.
7825 
7826 Int_t TProof::ClearPackage(const char *package)
7827 {
7828  if (!IsValid()) return -1;
7829 
7830  if (!package || !package[0]) {
7831  Error("ClearPackage", "need to specify a package name");
7832  return -1;
7833  }
7834 
7835  // if name, erroneously, is a par pathname strip off .par and path
7836  TString pac = package;
7837  if (pac.EndsWith(".par"))
7838  pac.Remove(pac.Length()-4);
7839  pac = gSystem->BaseName(pac);
7840 
7841  if (UnloadPackage(pac) == -1)
7842  return -1;
7843 
7844  if (DisablePackage(pac) == -1)
7845  return -1;
7846 
7847  return fStatus;
7848 }
7849 
7850 ////////////////////////////////////////////////////////////////////////////////
7851 /// Remove a specific package.
7852 /// Returns 0 in case of success and -1 in case of error.
7853 
7854 Int_t TProof::DisablePackage(const char *pack)
7855 {
7856  if (!IsValid()) return -1;
7857 
7858  if (!pack || strlen(pack) <= 0) {
7859  Error("DisablePackage", "need to specify a package name");
7860  return -1;
7861  }
7862 
7863  // if name, erroneously, is a par pathname strip off .par and path
7864  TString pac = pack;
7865  if (pac.EndsWith(".par"))
7866  pac.Remove(pac.Length()-4);
7867  pac = gSystem->BaseName(pac);
7868 
7869  if (fPackMgr->Remove(pack) < 0)
7870  Warning("DisablePackage", "problem removing locally package '%s'", pack);
7871 
7872  // Nothing more to do if we are a Lite-session
7873  if (IsLite()) return 0;
7874 
7875  Int_t st = -1;
7876  Bool_t done = kFALSE;
7877  if (fManager) {
7878  // Try to do it via XROOTD (new way)
7879  TString path;
7880  path.Form("~/packages/%s", pack);
7881  if (fManager->Rm(path, "-rf", "all") != -1) {
7882  path.Append(".par");
7883  if (fManager->Rm(path, "-f", "all") != -1) {
7884  done = kTRUE;
7885  st = 0;
7886  }
7887  }
7888  }
7889  if (!done) {
7890  // Try via TProofServ (old way)
7891  TMessage mess(kPROOF_CACHE);
7892  mess << Int_t(kDisablePackage) << pac;
7893  Broadcast(mess, kUnique);
7894 
7895  TMessage mess2(kPROOF_CACHE);
7896  mess2 << Int_t(kDisableSubPackage) << pac;
7897  Broadcast(mess2, fNonUniqueMasters);
7898 
7899  Collect(kAllUnique);
7900  st = fStatus;
7901  }
7902 
7903  // Done
7904  return st;
7905 }
7906 
7907 ////////////////////////////////////////////////////////////////////////////////
7908 /// Remove all packages.
7909 /// Returns 0 in case of success and -1 in case of error.
7910 
7911 Int_t TProof::DisablePackages()
7912 {
7913  if (!IsValid()) return -1;
7914 
7915  // remove all packages on client
7916  if (fPackMgr->Remove(nullptr) < 0)
7917  Warning("DisablePackages", "problem removing packages locally");
7918 
7919  // Nothing more to do if we are a Lite-session
7920  if (IsLite()) return 0;
7921 
7922  Int_t st = -1;
7923  Bool_t done = kFALSE;
7924  if (fManager) {
7925  // Try to do it via XROOTD (new way)
7926  if (fManager->Rm("~/packages/*", "-rf", "all") != -1) {
7927  done = kTRUE;
7928  st = 0;
7929  }
7930  }
7931  if (!done) {
7932 
7933  TMessage mess(kPROOF_CACHE);
7934  mess << Int_t(kDisablePackages);
7935  Broadcast(mess, kUnique);
7936 
7937  TMessage mess2(kPROOF_CACHE);
7938  mess2 << Int_t(kDisableSubPackages);
7939  Broadcast(mess2, fNonUniqueMasters);
7940 
7941  Collect(kAllUnique);
7942  st = fStatus;
7943  }
7944 
7945  // Done
7946  return st;
7947 }
7948 
7949 ////////////////////////////////////////////////////////////////////////////////
7950 /// Build specified package. Executes the PROOF-INF/BUILD.sh
7951 /// script if it exists on all unique nodes. If opt is kBuildOnSlavesNoWait
7952 /// then submit build command to slaves, but don't wait
7953 /// for results. If opt is kCollectBuildResults then collect result
7954 /// from slaves. To be used on the master.
7955 /// If opt = kBuildAll (default) then submit and wait for results
7956 /// (to be used on the client).
7957 /// Returns 0 in case of success and -1 in case of error.
7958 
7959 Int_t TProof::BuildPackage(const char *package,
7960  EBuildPackageOpt opt, Int_t chkveropt, TList *workers)
7961 {
7962  if (!IsValid()) return -1;
7963 
7964  if (!package || !package[0]) {
7965  Error("BuildPackage", "need to specify a package name");
7966  return -1;
7967  }
7968 
7969  // if name, erroneously, is a par pathname strip off .par and path
7970  TString pac = package;
7971  if (pac.EndsWith(".par"))
7972  pac.Remove(pac.Length()-4);
7973  pac = gSystem->BaseName(pac);
7974 
7975  Bool_t buildOnClient = kTRUE;
7976  if (opt == kDontBuildOnClient) {
7977  buildOnClient = kFALSE;
7978  opt = kBuildAll;
7979  }
7980  // Prepare the local package
7981  TString pdir;
7982  Int_t st = 0;
7983 
7984  if (opt <= kBuildAll && (!IsLite() || !buildOnClient)) {
7985  if (workers) {
7986  TMessage mess(kPROOF_CACHE);
7987  mess << Int_t(kBuildPackage) << pac << chkveropt;
7988  Broadcast(mess, workers);
7989 
7990  } else {
7991  TMessage mess(kPROOF_CACHE);
7992  mess << Int_t(kBuildPackage) << pac << chkveropt;
7993  Broadcast(mess, kUnique);
7994 
7995  TMessage mess2(kPROOF_CACHE);
7996  mess2 << Int_t(kBuildSubPackage) << pac << chkveropt;
7997  Broadcast(mess2, fNonUniqueMasters);
7998  }
7999  }
8000 
8001  if (opt >= kBuildAll) {
8002  // by first forwarding the build commands to the master and slaves
8003  // and only then building locally we build in parallel
8004  if (buildOnClient) {
8005  st = fPackMgr->Build(pac, chkveropt);
8006  }
8007 
8008 
8009  fStatus = 0;
8010  if (!IsLite() || !buildOnClient) {
8011 
8012  // On the master, workers that fail are deactivated
8013  // Bool_t deactivateOnFailure = (IsMaster()) ? kTRUE : kFALSE;
8014  if (workers) {
8015 // Collect(workers, -1, -1, deactivateOnFailure);
8016  Collect(workers);
8017  } else {
8018  Collect(kAllUnique);
8019  }
8020  }
8021 
8022  if (fStatus < 0 || st < 0)
8023  return -1;
8024  }
8025 
8026  return 0;
8027 }
8028 
8029 ////////////////////////////////////////////////////////////////////////////////
8030 /// Load specified package. Executes the PROOF-INF/SETUP.C script
8031 /// on all active nodes. If notOnClient = true, don't load package
8032 /// on the client. The default is to load the package also on the client.
8033 /// The argument 'loadopts' specify a list of objects to be passed to the SETUP.
8034 /// The objects in the list must be streamable; the SETUP macro will be executed
8035 /// like this: SETUP.C(loadopts).
8036 /// Returns 0 in case of success and -1 in case of error.
8037 
8038 Int_t TProof::LoadPackage(const char *package, Bool_t notOnClient,
8039  TList *loadopts, TList *workers)
8040 {
8041  if (!IsValid()) return -1;
8042 
8043  if (!package || !package[0]) {
8044  Error("LoadPackage", "need to specify a package name");
8045  return -1;
8046  }
8047 
8048  // if name, erroneously, is a par pathname strip off .par and path
8049  TString pac = package;
8050  if (pac.EndsWith(".par"))
8051  pac.Remove(pac.Length()-4);
8052  pac = gSystem->BaseName(pac);
8053 
8054  if (!notOnClient && TestBit(TProof::kIsClient))
8055  if (fPackMgr->Load(package, loadopts) == -1) return -1;
8056 
8057  TMessage mess(kPROOF_CACHE);
8058  mess << Int_t(kLoadPackage) << pac;
8059  if (loadopts) mess << loadopts;
8060 
8061  // On the master, workers that fail are deactivated
8062  Bool_t deactivateOnFailure = (IsMaster()) ? kTRUE : kFALSE;
8063 
8064  Bool_t doCollect = (fDynamicStartup && !IsIdle()) ? kFALSE : kTRUE;
8065 
8066  if (workers) {
8067  PDB(kPackage, 3)
8068  Info("LoadPackage", "Sending load message to selected workers only");
8069  Broadcast(mess, workers);
8070  if (doCollect) Collect(workers, -1, -1, deactivateOnFailure);
8071  } else {
8072  Broadcast(mess);
8073  Collect(kActive, -1, -1, deactivateOnFailure);
8074  }
8075 
8076  return fStatus;
8077 }
8078 
8079 ////////////////////////////////////////////////////////////////////////////////
8080 /// Unload specified package.
8081 /// Returns 0 in case of success and -1 in case of error.
8082 
8083 Int_t TProof::UnloadPackage(const char *package)
8084 {
8085  if (!IsValid()) return -1;
8086 
8087  if (!package || !package[0]) {
8088  Error("UnloadPackage", "need to specify a package name");
8089  return -1;
8090  }
8091 
8092  // if name, erroneously, is a par pathname strip off .par and path
8093  TString pac = package;
8094  if (pac.EndsWith(".par"))
8095  pac.Remove(pac.Length()-4);
8096  pac = gSystem->BaseName(pac);
8097 
8098  if (fPackMgr->Unload(package) < 0)
8099  Warning("UnloadPackage", "unable to remove symlink to %s", package);
8100 
8101  // Nothing more to do if we are a Lite-session
8102  if (IsLite()) return 0;
8103 
8104  TMessage mess(kPROOF_CACHE);
8105  mess << Int_t(kUnloadPackage) << pac;
8106  Broadcast(mess);
8107  Collect();
8108 
8109  return fStatus;
8110 }
8111 
8112 ////////////////////////////////////////////////////////////////////////////////
8113 /// Unload all packages.
8114 /// Returns 0 in case of success and -1 in case of error.
8115 
8116 Int_t TProof::UnloadPackages()
8117 {
8118  if (!IsValid()) return -1;
8119 
8120  if (TestBit(TProof::kIsClient)) {
8121  if (fPackMgr->Unload(0) < 0) return -1;
8122  }
8123 
8124  // Nothing more to do if we are a Lite-session
8125  if (IsLite()) return 0;
8126 
8127  TMessage mess(kPROOF_CACHE);
8128  mess << Int_t(kUnloadPackages);
8129  Broadcast(mess);
8130  Collect();
8131 
8132  return fStatus;
8133 }
8134 
8135 ////////////////////////////////////////////////////////////////////////////////
8136 /// Enable specified package. Executes the PROOF-INF/BUILD.sh
8137 /// script if it exists followed by the PROOF-INF/SETUP.C script.
8138 /// In case notOnClient = true, don't enable the package on the client.
8139 /// The default is to enable packages also on the client.
8140 /// If specified, enables packages only on the specified workers.
8141 /// Returns 0 in case of success and -1 in case of error.
8142 /// Provided for backward compatibility.
8143 
8144 Int_t TProof::EnablePackage(const char *package, Bool_t notOnClient,
8145  TList *workers)
8146 {
8147  return EnablePackage(package, (TList *)0, notOnClient, workers);
8148 }
8149 
8150 ////////////////////////////////////////////////////////////////////////////////
8151 /// Enable specified package. Executes the PROOF-INF/BUILD.sh
8152 /// script if it exists followed by the PROOF-INF/SETUP.C script.
8153 /// In case notOnClient = true, don't enable the package on the client.
8154 /// The default is to enable packages also on the client.
8155 /// It is is possible to specify options for the loading step via 'loadopts';
8156 /// the string will be passed passed as argument to SETUP.
8157 /// Special option 'chkv=<o>' (or 'checkversion=<o>') can be used to control
8158 /// plugin version checking during building: possible choices are:
8159 /// off no check; failure may occur at loading
8160 /// on check ROOT version [default]
8161 /// svn check ROOT version and Git commit SHA1.
8162 /// (Use ';', ' ' or '|' to separate 'chkv=<o>' from the rest.)
8163 /// If specified, enables packages only on the specified workers.
8164 /// Returns 0 in case of success and -1 in case of error.
8165 
8166 Int_t TProof::EnablePackage(const char *package, const char *loadopts,
8167  Bool_t notOnClient, TList *workers)
8168 {
8169  TList *optls = 0;
8170  if (loadopts && strlen(loadopts)) {
8171  if (fProtocol > 28) {
8172  TObjString *os = new TObjString(loadopts);
8173  // Filter out 'checkversion=off|on|svn' or 'chkv=...'
8174  os->String().ReplaceAll("checkversion=", "chkv=");
8175  Ssiz_t fcv = kNPOS, lcv = kNPOS;
8176  if ((fcv = os->String().Index("chkv=")) != kNPOS) {
8177  TRegexp re("[; |]");
8178  if ((lcv = os->String().Index(re, fcv)) == kNPOS) {
8179  lcv = os->String().Length();
8180  }
8181  TString ocv = os->String()(fcv, lcv - fcv);
8182  Int_t cvopt = -1;
8183  if (ocv.EndsWith("=off") || ocv.EndsWith("=0"))
8184  cvopt = (Int_t) TPackMgr::kDontCheck;
8185  else if (ocv.EndsWith("=on") || ocv.EndsWith("=1"))
8186  cvopt = (Int_t) TPackMgr::kCheckROOT;
8187  else
8188  Warning("EnablePackage", "'checkversion' option unknown from argument: '%s' - ignored", ocv.Data());
8189  if (cvopt > -1) {
8190  if (gDebug > 0)
8191  Info("EnablePackage", "setting check version option from argument: %d", cvopt);
8192  optls = new TList;
8193  optls->Add(new TParameter<Int_t>("PROOF_Package_CheckVersion", (Int_t) cvopt));
8194  // Remove the special option from; we leave a separator if there were two (one before and one after)
8195  if (lcv != kNPOS && fcv == 0) ocv += os->String()[lcv];
8196  if (fcv > 0 && os->String().Index(re, fcv - 1) == fcv - 1) os->String().Remove(fcv - 1, 1);
8197  os->String().ReplaceAll(ocv.Data(), "");
8198  }
8199  }
8200  if (!os->String().IsNull()) {
8201  if (!optls) optls = new TList;
8202  optls->Add(new TObjString(os->String().Data()));
8203  }
8204  if (optls) optls->SetOwner(kTRUE);
8205  } else {
8206  // Notify
8207  Warning("EnablePackage", "remote server does not support options: ignoring the option string");
8208  }
8209  }
8210  // Run
8211  Int_t rc = EnablePackage(package, optls, notOnClient, workers);
8212  // Clean up
8213  SafeDelete(optls);
8214  // Done
8215  return rc;
8216 }
8217 
8218 ////////////////////////////////////////////////////////////////////////////////
8219 /// Enable specified package. Executes the PROOF-INF/BUILD.sh
8220 /// script if it exists followed by the PROOF-INF/SETUP.C script.
8221 /// In case notOnClient = true, don't enable the package on the client.
8222 /// The default is to enable packages also on the client.
8223 /// It is is possible to specify a list of objects to be passed to the SETUP
8224 /// functions via 'loadopts'; the objects must be streamable.
8225 /// Returns 0 in case of success and -1 in case of error.
8226 
8227 Int_t TProof::EnablePackage(const char *package, TList *loadopts,
8228  Bool_t notOnClient, TList *workers)
8229 {
8230  if (!IsValid()) return -1;
8231 
8232  if (!package || !package[0]) {
8233  Error("EnablePackage", "need to specify a package name");
8234  return -1;
8235  }
8236 
8237  // if name, erroneously, is a par pathname strip off .par and path
8238  TString pac = package;
8239  if (pac.EndsWith(".par"))
8240  pac.Remove(pac.Length()-4);
8241  pac = gSystem->BaseName(pac);
8242 
8243  EBuildPackageOpt opt = kBuildAll;
8244  if (notOnClient)
8245  opt = kDontBuildOnClient;
8246 
8247  // Get check version option; user settings have priority
8248  Int_t chkveropt = TPackMgr::kCheckROOT;
8249  TString ocv = gEnv->GetValue("Proof.Package.CheckVersion", "");
8250  if (!ocv.IsNull()) {
8251  if (ocv == "off" || ocv == "0")
8252  chkveropt = (Int_t) TPackMgr::kDontCheck;
8253  else if (ocv == "on" || ocv == "1")
8254  chkveropt = (Int_t) TPackMgr::kCheckROOT;
8255  else
8256  Warning("EnablePackage", "'checkversion' option unknown from rootrc: '%s' - ignored", ocv.Data());
8257  }
8258  if (loadopts) {
8259  TParameter<Int_t> *pcv = (TParameter<Int_t> *) loadopts->FindObject("PROOF_Package_CheckVersion");
8260  if (pcv) {
8261  chkveropt = pcv->GetVal();
8262  loadopts->Remove(pcv);
8263  delete pcv;
8264  }
8265  }
8266  if (gDebug > 0)
8267  Info("EnablePackage", "using check version option: %d", chkveropt);
8268 
8269  if (BuildPackage(pac, opt, chkveropt, workers) == -1)
8270  return -1;
8271 
8272  TList *optls = (loadopts && loadopts->GetSize() > 0) ? loadopts : 0;
8273  if (optls && fProtocol <= 28) {
8274  Warning("EnablePackage", "remote server does not support options: ignoring the option list");
8275  optls = 0;
8276  }
8277 
8278  if (LoadPackage(pac, notOnClient, optls, workers) == -1)
8279  return -1;
8280 
8281  // Record the information for later usage (simulation of dynamic start on PROOF-Lite)
8282  if (!fEnabledPackagesOnCluster) {
8283  fEnabledPackagesOnCluster = new TList;
8284  fEnabledPackagesOnCluster->SetOwner();
8285  }
8286  if (!fEnabledPackagesOnCluster->FindObject(pac)) {
8287  TPair *pck = (optls && optls->GetSize() > 0) ? new TPair(new TObjString(pac), optls->Clone())
8288  : new TPair(new TObjString(pac), 0);
8289  fEnabledPackagesOnCluster->Add(pck);
8290  }
8291 
8292  return 0;
8293 }
8294 
8295 ////////////////////////////////////////////////////////////////////////////////
8296 /// Download a PROOF archive (PAR file) from the master package repository.
8297 /// The PAR file is downloaded in the current directory or in the directory
8298 /// specified by 'dstdir'. If a package with the same name already exists
8299 /// at destination, a check on the MD5 sum is done and the user warned or
8300 /// prompted for action, depending is the file is equal or different.
8301 /// Returns 0 in case of success and -1 in case of error.
8302 
8303 Int_t TProof::DownloadPackage(const char *pack, const char *dstdir)
8304 {
8305  if (!fManager || !(fManager->IsValid())) {
8306  Error("DownloadPackage", "the manager is undefined!");
8307  return -1;
8308  }
8309 
8310  // Create the default source and destination paths
8311  TString parname(gSystem->BaseName(pack)), src, dst;
8312  if (!parname.EndsWith(".par")) parname += ".par";
8313  src.Form("packages/%s", parname.Data());
8314  if (!dstdir || strlen(dstdir) <= 0) {
8315  dst.Form("./%s", parname.Data());
8316  } else {
8317  // Check the destination directory
8318  FileStat_t st;
8319  if (gSystem->GetPathInfo(dstdir, st) != 0) {
8320  // Directory does not exit: create it
8321  if (gSystem->mkdir(dstdir, kTRUE) != 0) {
8322  Error("DownloadPackage",
8323  "could not create the destination directory '%s' (errno: %d)",
8324  dstdir, TSystem::GetErrno());
8325  return -1;
8326  }
8327  } else if (!R_ISDIR(st.fMode) && !R_ISLNK(st.fMode)) {
8328  Error("DownloadPackage",
8329  "destination path '%s' exist but is not a directory!", dstdir);
8330  return -1;
8331  }
8332  dst.Form("%s/%s", dstdir, parname.Data());
8333  }
8334 
8335  // Make sure the source file exists
8336  FileStat_t stsrc;
8337  RedirectHandle_t rh;
8338  if (gSystem->RedirectOutput(fLogFileName, "a", &rh) != 0)
8339  Warning("DownloadPackage", "problems redirecting output to '%s'", fLogFileName.Data());
8340  Int_t rc = fManager->Stat(src, stsrc);
8341  if (gSystem->RedirectOutput(0, 0, &rh) != 0)
8342  Warning("DownloadPackage", "problems restoring output");
8343  if (rc != 0) {
8344  // Check if there is another possible source
8345  ShowPackages(kFALSE, kTRUE);
8346  TMacro *mp = GetLastLog();
8347  if (mp) {
8348  // Look for global directories
8349  Bool_t isGlobal = kFALSE;
8350  TIter nxl(mp->GetListOfLines());
8351  TObjString *os = 0;
8352  TString globaldir;
8353  while ((os = (TObjString *) nxl())) {
8354  TString s(os->GetName());
8355  if (s.Contains("*** Global Package cache")) {
8356  // Get the directory
8357  s.Remove(0, s.Last(':') + 1);
8358  s.Remove(s.Last(' '));
8359  globaldir = s;
8360  isGlobal = kTRUE;
8361  } else if (s.Contains("*** Package cache")) {
8362  isGlobal = kFALSE;
8363  globaldir = "";
8364  }
8365  // Check for the package
8366  if (isGlobal && s.Contains(parname)) {
8367  src.Form("%s/%s", globaldir.Data(), parname.Data());
8368  break;
8369  }
8370  }
8371  // Cleanup
8372  delete mp;
8373  }
8374  }
8375 
8376  // Do it via the manager
8377  if (fManager->GetFile(src, dst, "silent") != 0) {
8378  Error("DownloadPackage", "problems downloading '%s' (src:%s, dst:%s)",
8379  pack, src.Data(), dst.Data());
8380  return -1;
8381  } else {
8382  Info("DownloadPackage", "'%s' cross-checked against master repository (local path: %s)",
8383  pack, dst.Data());
8384  }
8385  // Done
8386  return 0;
8387 }
8388 
8389 ////////////////////////////////////////////////////////////////////////////////
8390 /// Upload a PROOF archive (PAR file). A PAR file is a compressed
8391 /// tar file with one special additional directory, PROOF-INF
8392 /// (blatantly copied from Java's jar format). It must have the extension
8393 /// .par. A PAR file can be directly a binary or a source with a build
8394 /// procedure. In the PROOF-INF directory there can be a build script:
8395 /// BUILD.sh to be called to build the package, in case of a binary PAR
8396 /// file don't specify a build script or make it a no-op. Then there is
8397 /// SETUP.C which sets the right environment variables to use the package,
8398 /// like LD_LIBRARY_PATH, etc.
8399 /// The 'opt' allows to specify whether the .PAR should be just unpacked
8400 /// in the existing dir (opt = kUntar, default) or a remove of the existing
8401 /// directory should be executed (opt = kRemoveOld), so triggering a full
8402 /// re-build. The option if effective only for PROOF protocol > 8 .
8403 /// The lab 'dirlab' (e.g. 'G0') indicates that the package is to uploaded to
8404 /// an alternative global directory for global usage. This may require special
8405 /// privileges.
8406 /// If download is kTRUE and the package is not found locally, then it is downloaded
8407 /// from the master repository.
8408 /// Returns 0 in case of success and -1 in case of error.
8409 
8410 Int_t TProof::UploadPackage(const char *pack, EUploadPackageOpt opt,
8411  TList *workers)
8412 {
8413  if (!IsValid()) return -1;
8414 
8415  // Remote PAR ?
8416  TFile::EFileType ft = TFile::GetType(pack);
8417  Bool_t remotepar = (ft == TFile::kWeb || ft == TFile::kNet) ? kTRUE : kFALSE;
8418 
8419  TString par(pack), base, name;
8420  if (par.EndsWith(".par")) {
8421  base = gSystem->BaseName(par);
8422  name = base(0, base.Length() - strlen(".par"));
8423  } else {
8424  name = gSystem->BaseName(par);
8425  base.Form("%s.par", name.Data());
8426  par += ".par";
8427  }
8428 
8429  // Default location is the local working dir; then the package dir
8430  gSystem->ExpandPathName(par);
8431  if (gSystem->AccessPathName(par, kReadPermission)) {
8432  Int_t xrc = -1;
8433  if (!remotepar) xrc = TPackMgr::FindParPath(fPackMgr, name, par);
8434  if (xrc == 0) {
8435  // Package is in the global dirs
8436  if (gDebug > 0)
8437  Info("UploadPackage", "global package found (%s): no upload needed",
8438  par.Data());
8439  return 0;
8440  } else if (xrc < 0) {
8441  Error("UploadPackage", "PAR file '%s' not found", par.Data());
8442  return -1;
8443  }
8444  }
8445 
8446  // Strategy:
8447  // On the client:
8448  // get md5 of package and check if it is different
8449  // from the one stored in the local package directory. If it is lock
8450  // the package directory and copy the package, unlock the directory.
8451  // On the masters:
8452  // get md5 of package and check if it is different from the
8453  // one stored on the remote node. If it is different lock the remote
8454  // package directory and use TFTP or SendFile to ftp the package to the
8455  // remote node, unlock the directory.
8456 
8457 
8458  if (TestBit(TProof::kIsClient)) {
8459  Bool_t rmold = (opt == TProof::kRemoveOld) ? kTRUE : kFALSE;
8460  if (fPackMgr->Install(par, rmold) < 0) {
8461  Error("UploadPackage", "installing '%s' failed", gSystem->BaseName(par));
8462  return -1;
8463  }
8464  }
8465 
8466  // Nothing more to do if we are a Lite-session
8467  if (IsLite()) return 0;
8468 
8469  TMD5 *md5 = fPackMgr->ReadMD5(name);
8470 
8471  TString smsg;
8472  if (remotepar && GetRemoteProtocol() > 36) {
8473  smsg.Form("+%s", par.Data());
8474  } else {
8475  smsg.Form("+%s", base.Data());
8476  }
8477 
8478  TMessage mess(kPROOF_CHECKFILE);
8479  mess << smsg << (*md5);
8480  TMessage mess2(kPROOF_CHECKFILE);
8481  smsg.Replace(0, 1, "-");
8482  mess2 << smsg << (*md5);
8483  TMessage mess3(kPROOF_CHECKFILE);
8484  smsg.Replace(0, 1, "=");
8485  mess3 << smsg << (*md5);
8486 
8487  delete md5;
8488 
8489  if (fProtocol > 8) {
8490  // Send also the option
8491  mess << (UInt_t) opt;
8492  mess2 << (UInt_t) opt;
8493  mess3 << (UInt_t) opt;
8494  }
8495 
8496  // Loop over all slaves with unique fs image, or to a selected
8497  // list of workers, if specified
8498  if (!workers)
8499  workers = fUniqueSlaves;
8500  TIter next(workers);
8501  TSlave *sl = 0;
8502  while ((sl = (TSlave *) next())) {
8503  if (!sl->IsValid())
8504  continue;
8505 
8506  sl->GetSocket()->Send(mess);
8507 
8508  fCheckFileStatus = 0;
8509  Collect(sl, fCollectTimeout, kPROOF_CHECKFILE);
8510  if (fCheckFileStatus == 0) {
8511 
8512  if (fProtocol > 5) {
8513  // remote directory is locked, upload file over the open channel
8514  smsg.Form("%s/%s/%s", sl->GetProofWorkDir(), kPROOF_PackDir, base.Data());
8515  if (SendFile(par, (kBinary | kForce | kCpBin | kForward), smsg.Data(), sl) < 0) {
8516  Error("UploadPackage", "%s: problems uploading file %s",
8517  sl->GetOrdinal(), par.Data());
8518  return -1;
8519  }
8520  } else {
8521  // old servers receive it via TFTP
8522  TFTP ftp(TString("root://")+sl->GetName(), 1);
8523  if (!ftp.IsZombie()) {
8524  smsg.Form("%s/%s", sl->GetProofWorkDir(), kPROOF_PackDir);
8525  ftp.cd(smsg.Data());
8526  ftp.put(par, base.Data());
8527  }
8528  }
8529 
8530  // install package and unlock dir
8531  sl->GetSocket()->Send(mess2);
8532  fCheckFileStatus = 0;
8533  Collect(sl, fCollectTimeout, kPROOF_CHECKFILE);
8534  if (fCheckFileStatus == 0) {
8535  Error("UploadPackage", "%s: unpacking of package %s failed",
8536  sl->GetOrdinal(), base.Data());
8537  return -1;
8538  }
8539  }
8540  }
8541 
8542  // loop over all other master nodes
8543  TIter nextmaster(fNonUniqueMasters);
8544  TSlave *ma;
8545  while ((ma = (TSlave *) nextmaster())) {
8546  if (!ma->IsValid())
8547  continue;
8548 
8549  ma->GetSocket()->Send(mess3);
8550 
8551  fCheckFileStatus = 0;
8552  Collect(ma, fCollectTimeout, kPROOF_CHECKFILE);
8553  if (fCheckFileStatus == 0) {
8554  // error -> package should have been found
8555  Error("UploadPackage", "package %s did not exist on submaster %s",
8556  base.Data(), ma->GetOrdinal());
8557  return -1;
8558  }
8559  }
8560 
8561  return 0;
8562 }
8563 
8564 
8565 ////////////////////////////////////////////////////////////////////////////////
8566 /// Make sure that the directory path contained by macro is in the macro path
8567 
8568 void TProof::AssertMacroPath(const char *macro)
8569 {
8570  static TString macrop(gROOT->GetMacroPath());
8571  if (macro && strlen(macro) > 0) {
8572  TString dirn(gSystem->DirName(macro));
8573  if (!macrop.Contains(dirn)) {
8574  macrop += TString::Format("%s:", dirn.Data());
8575  gROOT->SetMacroPath(macrop);
8576  }
8577  }
8578 }
8579 
8580 
8581 ////////////////////////////////////////////////////////////////////////////////
8582 /// Load the specified macro on master, workers and, if notOnClient is
8583 /// kFALSE, on the client. The macro file is uploaded if new or updated.
8584 /// Additional files to be uploaded (or updated, if needed) can be specified
8585 /// after a comma, e.g. "mymacro.C+,thisheader.h,thatheader.h".
8586 /// If existing in the same directory, a header basename(macro).h or .hh, is also
8587 /// uploaded.
8588 /// The default is to load the macro also on the client; notOnClient can be used
8589 /// to avoid loading on the client.
8590 /// On masters, if uniqueWorkers is kTRUE, the macro is loaded on unique workers
8591 /// only, and collection is not done; if uniqueWorkers is kFALSE, collection
8592 /// from the previous request is done, and broadcasting + collection from the
8593 /// other workers is done.
8594 /// The wrks arg can be used on the master to limit the set of workers.
8595 /// Returns 0 in case of success and -1 in case of error.
8596 
8597 Int_t TProof::Load(const char *macro, Bool_t notOnClient, Bool_t uniqueWorkers,
8598  TList *wrks)
8599 {
8600  if (!IsValid()) return -1;
8601 
8602  if (!macro || !macro[0]) {
8603  Error("Load", "need to specify a macro name");
8604  return -1;
8605  }
8606 
8607  // Make sure the path is in the macro path
8608  TProof::AssertMacroPath(macro);
8609 
8610  if (TestBit(TProof::kIsClient) && !wrks) {
8611 
8612  // Extract the file implementation name first
8613  TString addsname, implname = macro;
8614  Ssiz_t icom = implname.Index(",");
8615  if (icom != kNPOS) {
8616  addsname = implname(icom + 1, implname.Length());
8617  implname.Remove(icom);
8618  }
8619  TString basemacro = gSystem->BaseName(implname), mainmacro(implname);
8620  TString bmsg(basemacro), acmode, args, io;
8621  implname = gSystem->SplitAclicMode(implname, acmode, args, io);
8622 
8623  // Macro names must have a standard format
8624  Int_t dot = implname.Last('.');
8625  if (dot == kNPOS) {
8626  Info("Load", "macro '%s' does not contain a '.': do nothing", macro);
8627  return -1;
8628  }
8629 
8630  // Is there any associated header file
8631  Bool_t hasHeader = kTRUE;
8632  TString headname = implname;
8633  headname.Remove(dot);
8634  headname += ".h";
8635  if (gSystem->AccessPathName(headname, kReadPermission)) {
8636  TString h = headname;
8637  headname.Remove(dot);
8638  headname += ".hh";
8639  if (gSystem->AccessPathName(headname, kReadPermission)) {
8640  hasHeader = kFALSE;
8641  if (gDebug > 0)
8642  Info("Load", "no associated header file found: tried: %s %s",
8643  h.Data(), headname.Data());
8644  }
8645  }
8646 
8647  // Is there any additional file ?
8648  TString addincs;
8649  TList addfiles;
8650  if (!addsname.IsNull()) {
8651  TString fn;
8652  Int_t from = 0;
8653  while (addsname.Tokenize(fn, from, ",")) {
8654  if (gSystem->AccessPathName(fn, kReadPermission)) {
8655  Error("Load", "additional file '%s' not found", fn.Data());
8656  return -1;
8657  }
8658  // Create the additional include statement
8659  if (!notOnClient) {
8660  TString dirn(gSystem->DirName(fn));
8661  if (addincs.IsNull()) {
8662  addincs.Form("-I%s", dirn.Data());
8663  } else if (!addincs.Contains(dirn)) {
8664  addincs += TString::Format(" -I%s", dirn.Data());
8665  }
8666  }
8667  // Remember these files ...
8668  addfiles.Add(new TObjString(fn));
8669  }
8670  }
8671 
8672  // Send files now; the md5 check is run here; see SendFile for more
8673  // details.
8674  if (SendFile(implname, kAscii | kForward , "cache") == -1) {
8675  Error("Load", "problems sending implementation file %s", implname.Data());
8676  return -1;
8677  }
8678  if (hasHeader)
8679  if (SendFile(headname, kAscii | kForward , "cache") == -1) {
8680  Error("Load", "problems sending header file %s", headname.Data());
8681  return -1;
8682  }
8683  // Additional files
8684  if (addfiles.GetSize() > 0) {
8685  TIter nxfn(&addfiles);
8686  TObjString *os = 0;
8687  while ((os = (TObjString *) nxfn())) {
8688  // These files need to be available everywhere, cache and sandbox
8689  if (SendFile(os->GetName(), kAscii | kForward, "cache") == -1) {
8690  Error("Load", "problems sending additional file %s", os->GetName());
8691  return -1;
8692  }
8693  // Add the base names to the message broadcasted
8694  bmsg += TString::Format(",%s", gSystem->BaseName(os->GetName()));
8695  }
8696  addfiles.SetOwner(kTRUE);
8697  }
8698 
8699  // The files are now on the workers: now we send the loading request
8700  TMessage mess(kPROOF_CACHE);
8701  if (GetRemoteProtocol() < 34) {
8702  mess << Int_t(kLoadMacro) << basemacro;
8703  // This may be needed
8704  AddIncludePath("../../cache");
8705  } else {
8706  mess << Int_t(kLoadMacro) << bmsg;
8707  }
8708  Broadcast(mess, kActive);
8709 
8710  // Load locally, if required
8711  if (!notOnClient) {
8712  // Mofify the include path
8713  TString oldincs = gSystem->GetIncludePath();
8714  if (!addincs.IsNull()) gSystem->AddIncludePath(addincs);
8715 
8716  // By first forwarding the load command to the master and workers
8717  // and only then loading locally we load/build in parallel
8718  gROOT->ProcessLine(TString::Format(".L %s", mainmacro.Data()));
8719 
8720  // Restore include path
8721  if (!addincs.IsNull()) gSystem->SetIncludePath(oldincs);
8722 
8723  // Update the macro path
8724  TString mp(TROOT::GetMacroPath());
8725  TString np(gSystem->DirName(macro));
8726  if (!np.IsNull()) {
8727  np += ":";
8728  if (!mp.BeginsWith(np) && !mp.Contains(":"+np)) {
8729  Int_t ip = (mp.BeginsWith(".:")) ? 2 : 0;
8730  mp.Insert(ip, np);
8731  TROOT::SetMacroPath(mp);
8732  if (gDebug > 0)
8733  Info("Load", "macro path set to '%s'", TROOT::GetMacroPath());
8734  }
8735  }
8736  }
8737 
8738  // Wait for master and workers to be done
8739  Collect(kActive);
8740 
8741  if (IsLite()) {
8742  PDB(kGlobal, 1) Info("Load", "adding loaded macro: %s", macro);
8743  if (!fLoadedMacros) {
8744  fLoadedMacros = new TList();
8745  fLoadedMacros->SetOwner();
8746  }
8747  // if wrks is specified the macro should already be loaded on the master.
8748  fLoadedMacros->Add(new TObjString(macro));
8749  }
8750 
8751  } else {
8752  // On master
8753 
8754  // The files are now on the workers: now we send the loading request first
8755  // to the unique workers, so that the eventual compilation occurs only once.
8756  TString basemacro = gSystem->BaseName(macro);
8757  TMessage mess(kPROOF_CACHE);
8758 
8759  if (uniqueWorkers) {
8760  mess << Int_t(kLoadMacro) << basemacro;
8761  if (wrks) {
8762  Broadcast(mess, wrks);
8763  Collect(wrks);
8764  } else {
8765  Broadcast(mess, kUnique);
8766  }
8767  } else {
8768  // Wait for the result of the previous sending
8769  Collect(kUnique);
8770 
8771  // We then send a tuned loading request to the other workers
8772  TList others;
8773  TSlave *wrk = 0;
8774  TIter nxw(fActiveSlaves);
8775  while ((wrk = (TSlave *)nxw())) {
8776  if (!fUniqueSlaves->FindObject(wrk)) {
8777  others.Add(wrk);
8778  }
8779  }
8780 
8781  // Do not force compilation, if it was requested
8782  Int_t ld = basemacro.Last('.');
8783  if (ld != kNPOS) {
8784  Int_t lpp = basemacro.Index("++", ld);
8785  if (lpp != kNPOS) basemacro.Replace(lpp, 2, "+");
8786  }
8787  mess << Int_t(kLoadMacro) << basemacro;
8788  Broadcast(mess, &others);
8789  Collect(&others);
8790  }
8791 
8792  PDB(kGlobal, 1) Info("Load", "adding loaded macro: %s", macro);
8793  if (!fLoadedMacros) {
8794  fLoadedMacros = new TList();
8795  fLoadedMacros->SetOwner();
8796  }
8797  // if wrks is specified the macro should already be loaded on the master.
8798  if (!wrks)
8799  fLoadedMacros->Add(new TObjString(macro));
8800  }
8801 
8802  // Done
8803  return 0;
8804 }
8805 
8806 ////////////////////////////////////////////////////////////////////////////////
8807 /// Add 'libpath' to the lib path search.
8808 /// Multiple paths can be specified at once separating them with a comma or
8809 /// a blank.
8810 /// Return 0 on success, -1 otherwise
8811 
8812 Int_t TProof::AddDynamicPath(const char *libpath, Bool_t onClient, TList *wrks,
8813  Bool_t doCollect)
8814 {
8815  if ((!libpath || !libpath[0])) {
8816  if (gDebug > 0)
8817  Info("AddDynamicPath", "list is empty - nothing to do");
8818  return 0;
8819  }
8820 
8821  // Do it also on clients, if required
8822  if (onClient)
8823  HandleLibIncPath("lib", kTRUE, libpath);
8824 
8825  TMessage m(kPROOF_LIB_INC_PATH);
8826  m << TString("lib") << (Bool_t)kTRUE;
8827 
8828  // Add paths
8829  if (libpath && strlen(libpath)) {
8830  m << TString(libpath);
8831  } else {
8832  m << TString("-");
8833  }
8834 
8835  // Tell the server to send back or not
8836  m << (Int_t)doCollect;
8837 
8838  // Forward the request
8839  if (wrks) {
8840  Broadcast(m, wrks);
8841  if (doCollect)
8842  Collect(wrks, fCollectTimeout);
8843  } else {
8844  Broadcast(m);
8845  Collect(kActive, fCollectTimeout);
8846  }
8847 
8848  return 0;
8849 }
8850 
8851 ////////////////////////////////////////////////////////////////////////////////
8852 /// Add 'incpath' to the inc path search.
8853 /// Multiple paths can be specified at once separating them with a comma or
8854 /// a blank.
8855 /// Return 0 on success, -1 otherwise
8856 
8857 Int_t TProof::AddIncludePath(const char *incpath, Bool_t onClient, TList *wrks,
8858  Bool_t doCollect)
8859 {
8860  if ((!incpath || !incpath[0])) {
8861  if (gDebug > 0)
8862  Info("AddIncludePath", "list is empty - nothing to do");
8863  return 0;
8864  }
8865 
8866  // Do it also on clients, if required
8867  if (onClient)
8868  HandleLibIncPath("inc", kTRUE, incpath);
8869 
8870  TMessage m(kPROOF_LIB_INC_PATH);
8871  m << TString("inc") << (Bool_t)kTRUE;
8872 
8873  // Add paths
8874  if (incpath && strlen(incpath)) {
8875  m << TString(incpath);
8876  } else {
8877  m << TString("-");
8878  }
8879 
8880  // Tell the server to send back or not
8881  m << (Int_t)doCollect;
8882 
8883  // Forward the request
8884  if (wrks) {
8885  Broadcast(m, wrks);
8886  if (doCollect)
8887  Collect(wrks, fCollectTimeout);
8888  } else {
8889  Broadcast(m);
8890  Collect(kActive, fCollectTimeout);
8891  }
8892 
8893  return 0;
8894 }
8895 
8896 ////////////////////////////////////////////////////////////////////////////////
8897 /// Remove 'libpath' from the lib path search.
8898 /// Multiple paths can be specified at once separating them with a comma or
8899 /// a blank.
8900 /// Return 0 on success, -1 otherwise
8901 
8902 Int_t TProof::RemoveDynamicPath(const char *libpath, Bool_t onClient)
8903 {
8904  if ((!libpath || !libpath[0])) {
8905  if (gDebug > 0)
8906  Info("RemoveDynamicPath", "list is empty - nothing to do");
8907  return 0;
8908  }
8909 
8910  // Do it also on clients, if required
8911  if (onClient)
8912  HandleLibIncPath("lib", kFALSE, libpath);
8913 
8914  TMessage m(kPROOF_LIB_INC_PATH);
8915  m << TString("lib") <<(Bool_t)kFALSE;
8916 
8917  // Add paths
8918  if (libpath && strlen(libpath))
8919  m << TString(libpath);
8920  else
8921  m << TString("-");
8922 
8923  // Forward the request
8924  Broadcast(m);
8925  Collect(kActive, fCollectTimeout);
8926 
8927  return 0;
8928 }
8929 
8930 ////////////////////////////////////////////////////////////////////////////////
8931 /// Remove 'incpath' from the inc path search.
8932 /// Multiple paths can be specified at once separating them with a comma or
8933 /// a blank.
8934 /// Return 0 on success, -1 otherwise
8935 
8936 Int_t TProof::RemoveIncludePath(const char *incpath, Bool_t onClient)
8937 {
8938  if ((!incpath || !incpath[0])) {
8939  if (gDebug > 0)
8940  Info("RemoveIncludePath", "list is empty - nothing to do");
8941  return 0;
8942  }
8943 
8944  // Do it also on clients, if required
8945  if (onClient)
8946  HandleLibIncPath("in", kFALSE, incpath);
8947 
8948  TMessage m(kPROOF_LIB_INC_PATH);
8949  m << TString("inc") << (Bool_t)kFALSE;
8950 
8951  // Add paths
8952  if (incpath && strlen(incpath))
8953  m << TString(incpath);
8954  else
8955  m << TString("-");
8956 
8957  // Forward the request
8958  Broadcast(m);
8959  Collect(kActive, fCollectTimeout);
8960 
8961  return 0;
8962 }
8963 
8964 ////////////////////////////////////////////////////////////////////////////////
8965 /// Handle lib, inc search paths modification request
8966 
8967 void TProof::HandleLibIncPath(const char *what, Bool_t add, const char *dirs)
8968 {
8969  TString type(what);
8970  TString path(dirs);
8971 
8972  // Check type of action
8973  if ((type != "lib") && (type != "inc")) {
8974  Error("HandleLibIncPath","unknown action type: %s - protocol error?", type.Data());
8975  return;
8976  }
8977 
8978  // Separators can be either commas or blanks
8979  path.ReplaceAll(","," ");
8980 
8981  // Decompose lists
8982  TObjArray *op = 0;
8983  if (path.Length() > 0 && path != "-") {
8984  if (!(op = path.Tokenize(" "))) {
8985  Warning("HandleLibIncPath","decomposing path %s", path.Data());
8986  return;
8987  }
8988  }
8989 
8990  if (add) {
8991 
8992  if (type == "lib") {
8993 
8994  // Add libs
8995  TIter nxl(op, kIterBackward);
8996  TObjString *lib = 0;
8997  while ((lib = (TObjString *) nxl())) {
8998  // Expand path
8999  TString xlib = lib->GetName();
9000  gSystem->ExpandPathName(xlib);
9001  // Add to the dynamic lib search path if it exists and can be read
9002  if (!gSystem->AccessPathName(xlib, kReadPermission)) {
9003  TString newlibpath = gSystem->GetDynamicPath();
9004  // In the first position after the working dir
9005  Int_t pos = 0;
9006  if (newlibpath.BeginsWith(".:"))
9007  pos = 2;
9008  if (newlibpath.Index(xlib) == kNPOS) {
9009  newlibpath.Insert(pos,TString::Format("%s:", xlib.Data()));
9010  gSystem->SetDynamicPath(newlibpath);
9011  }
9012  } else {
9013  if (gDebug > 0)
9014  Info("HandleLibIncPath",
9015  "libpath %s does not exist or cannot be read - not added", xlib.Data());
9016  }
9017  }
9018 
9019  } else {
9020 
9021  // Add incs
9022  TIter nxi(op);
9023  TObjString *inc = 0;
9024  while ((inc = (TObjString *) nxi())) {
9025  // Expand path
9026  TString xinc = inc->GetName();
9027  gSystem->ExpandPathName(xinc);
9028  // Add to the dynamic lib search path if it exists and can be read
9029  if (!gSystem->AccessPathName(xinc, kReadPermission)) {
9030  TString curincpath = gSystem->GetIncludePath();
9031  if (curincpath.Index(xinc) == kNPOS)
9032  gSystem->AddIncludePath(TString::Format("-I%s", xinc.Data()));
9033  } else
9034  if (gDebug > 0)
9035  Info("HandleLibIncPath",
9036  "incpath %s does not exist or cannot be read - not added", xinc.Data());
9037  }
9038  }
9039 
9040 
9041  } else {
9042 
9043  if (type == "lib") {
9044 
9045  // Remove libs
9046  TIter nxl(op);
9047  TObjString *lib = 0;
9048  while ((lib = (TObjString *) nxl())) {
9049  // Expand path
9050  TString xlib = lib->GetName();
9051  gSystem->ExpandPathName(xlib);
9052  // Remove from the dynamic lib search path
9053  TString newlibpath = gSystem->GetDynamicPath();
9054  newlibpath.ReplaceAll(TString::Format("%s:", xlib.Data()),"");
9055  gSystem->SetDynamicPath(newlibpath);
9056  }
9057 
9058  } else {
9059 
9060  // Remove incs
9061  TIter nxi(op);
9062  TObjString *inc = 0;
9063  while ((inc = (TObjString *) nxi())) {
9064  TString newincpath = gSystem->GetIncludePath();
9065  newincpath.ReplaceAll(TString::Format("-I%s", inc->GetName()),"");
9066  // Remove the interpreter path (added anyhow internally)
9067  newincpath.ReplaceAll(gInterpreter->GetIncludePath(),"");
9068  gSystem->SetIncludePath(newincpath);
9069  }
9070  }
9071  }
9072 }
9073 
9074 ////////////////////////////////////////////////////////////////////////////////
9075 /// Get from the master the list of names of the packages available.
9076 
9077 TList *TProof::GetListOfPackages()
9078 {
9079  if (!IsValid())
9080  return (TList *)0;
9081 
9082  TMessage mess(kPROOF_CACHE);
9083  mess << Int_t(kListPackages);
9084  Broadcast(mess);
9085  Collect(kActive, fCollectTimeout);
9086 
9087  return fAvailablePackages;
9088 }
9089 
9090 ////////////////////////////////////////////////////////////////////////////////
9091 /// Get from the master the list of names of the packages enabled.
9092 
9093 TList *TProof::GetListOfEnabledPackages()
9094 {
9095  if (!IsValid())
9096  return (TList *)0;
9097 
9098  TMessage mess(kPROOF_CACHE);
9099  mess << Int_t(kListEnabledPackages);
9100  Broadcast(mess);
9101  Collect(kActive, fCollectTimeout);
9102 
9103  return fEnabledPackages;
9104 }
9105 
9106 ////////////////////////////////////////////////////////////////////////////////
9107 /// Print a progress bar on stderr. Used in batch mode.
9108 
9109 void TProof::PrintProgress(Long64_t total, Long64_t processed,
9110  Float_t procTime, Long64_t bytesread)
9111 {
9112  if (fPrintProgress) {
9113  Bool_t redirlog = fRedirLog;
9114  fRedirLog = kFALSE;
9115  // Call the external function
9116  (*fPrintProgress)(total, processed, procTime, bytesread);
9117  fRedirLog = redirlog;
9118  return;
9119  }
9120 
9121  fprintf(stderr, "[TProof::Progress] Total %lld events\t|", total);
9122 
9123  for (int l = 0; l < 20; l++) {
9124  if (total > 0) {
9125  if (l < 20*processed/total)
9126  fprintf(stderr, "=");
9127  else if (l == 20*processed/total)
9128  fprintf(stderr, ">");
9129  else if (l > 20*processed/total)
9130  fprintf(stderr, ".");
9131  } else
9132  fprintf(stderr, "=");
9133  }
9134  Float_t evtrti = (procTime > 0. && processed > 0) ? processed / procTime : -1.;
9135  Float_t mbsrti = (procTime > 0. && bytesread > 0) ? bytesread / procTime : -1.;
9136  TString sunit("B/s");
9137  if (evtrti > 0.) {
9138  Float_t remainingTime = (total >= processed) ? (total - processed) / evtrti : -1;
9139  if (mbsrti > 0.) {
9140  const Float_t toK = 1024., toM = 1048576., toG = 1073741824.;
9141  if (mbsrti >= toG) {
9142  mbsrti /= toG;
9143  sunit = "GB/s";
9144  } else if (mbsrti >= toM) {
9145  mbsrti /= toM;
9146  sunit = "MB/s";
9147  } else if (mbsrti >= toK) {
9148  mbsrti /= toK;
9149  sunit = "kB/s";
9150  }
9151  fprintf(stderr, "| %.02f %% [%.1f evts/s, %.1f %s, time left: %.1f s]\r",
9152  (total ? ((100.0*processed)/total) : 100.0), evtrti, mbsrti, sunit.Data(), remainingTime);
9153  } else {
9154  fprintf(stderr, "| %.02f %% [%.1f evts/s, time left: %.1f s]\r",
9155  (total ? ((100.0*processed)/total) : 100.0), evtrti, remainingTime);
9156  }
9157  } else {
9158  fprintf(stderr, "| %.02f %%\r",
9159  (total ? ((100.0*processed)/total) : 100.0));
9160  }
9161  if (processed >= total) {
9162  fprintf(stderr, "\n Query processing time: %.1f s\n", procTime);
9163  }
9164 }
9165 
9166 ////////////////////////////////////////////////////////////////////////////////
9167 /// Get query progress information. Connect a slot to this signal
9168 /// to track progress.
9169 
9170 void TProof::Progress(Long64_t total, Long64_t processed)
9171 {
9172  if (fPrintProgress) {
9173  // Call the external function
9174  return (*fPrintProgress)(total, processed, -1., -1);
9175  }
9176 
9177  PDB(kGlobal,1)
9178  Info("Progress","%2f (%lld/%lld)", 100.*processed/total, processed, total);
9179 
9180  if (gROOT->IsBatch()) {
9181  // Simple progress bar
9182  if (total > 0)
9183  PrintProgress(total, processed);
9184  } else {
9185  EmitVA("Progress(Long64_t,Long64_t)", 2, total, processed);
9186  }
9187 }
9188 
9189 ////////////////////////////////////////////////////////////////////////////////
9190 /// Get query progress information. Connect a slot to this signal
9191 /// to track progress.
9192 
9193 void TProof::Progress(Long64_t total, Long64_t processed, Long64_t bytesread,
9194  Float_t initTime, Float_t procTime,
9195  Float_t evtrti, Float_t mbrti)
9196 {
9197  PDB(kGlobal,1)
9198  Info("Progress","%lld %lld %lld %f %f %f %f", total, processed, bytesread,
9199  initTime, procTime, evtrti, mbrti);
9200 
9201  if (gROOT->IsBatch()) {
9202  // Simple progress bar
9203  if (total > 0)
9204  PrintProgress(total, processed, procTime, bytesread);
9205  } else {
9206  EmitVA("Progress(Long64_t,Long64_t,Long64_t,Float_t,Float_t,Float_t,Float_t)",
9207  7, total, processed, bytesread, initTime, procTime, evtrti, mbrti);
9208  }
9209 }
9210 
9211 ////////////////////////////////////////////////////////////////////////////////
9212 /// Get query progress information. Connect a slot to this signal
9213 /// to track progress.
9214 
9215 void TProof::Progress(Long64_t total, Long64_t processed, Long64_t bytesread,
9216  Float_t initTime, Float_t procTime,
9217  Float_t evtrti, Float_t mbrti, Int_t actw, Int_t tses, Float_t eses)
9218 {
9219  PDB(kGlobal,1)
9220  Info("Progress","%lld %lld %lld %f %f %f %f %d %f", total, processed, bytesread,
9221  initTime, procTime, evtrti, mbrti, actw, eses);
9222 
9223  if (gROOT->IsBatch()) {
9224  // Simple progress bar
9225  if (total > 0)
9226  PrintProgress(total, processed, procTime, bytesread);
9227  } else {
9228  EmitVA("Progress(Long64_t,Long64_t,Long64_t,Float_t,Float_t,Float_t,Float_t,Int_t,Int_t,Float_t)",
9229  10, total, processed, bytesread, initTime, procTime, evtrti, mbrti, actw, tses, eses);
9230  }
9231 }
9232 
9233 ////////////////////////////////////////////////////////////////////////////////
9234 /// Get list of feedback objects. Connect a slot to this signal
9235 /// to monitor the feedback object.
9236 
9237 void TProof::Feedback(TList *objs)
9238 {
9239  PDB(kGlobal,1)
9240  Info("Feedback","%d objects", objs->GetSize());
9241  PDB(kFeedback,1) {
9242  Info("Feedback","%d objects", objs->GetSize());
9243  objs->ls();
9244  }
9245 
9246  Emit("Feedback(TList *objs)", (Long_t) objs);
9247 }
9248 
9249 ////////////////////////////////////////////////////////////////////////////////
9250 /// Close progress dialog.
9251 
9252 void TProof::CloseProgressDialog()
9253 {
9254  PDB(kGlobal,1)
9255  Info("CloseProgressDialog",
9256  "called: have progress dialog: %d", fProgressDialogStarted);
9257 
9258  // Nothing to do if not there
9259  if (!fProgressDialogStarted)
9260  return;
9261 
9262  Emit("CloseProgressDialog()");
9263 }
9264 
9265 ////////////////////////////////////////////////////////////////////////////////
9266 /// Reset progress dialog.
9267 
9268 void TProof::ResetProgressDialog(const char *sel, Int_t sz, Long64_t fst,
9269  Long64_t ent)
9270 {
9271  PDB(kGlobal,1)
9272  Info("ResetProgressDialog","(%s,%d,%lld,%lld)", sel, sz, fst, ent);
9273 
9274  EmitVA("ResetProgressDialog(const char*,Int_t,Long64_t,Long64_t)",
9275  4, sel, sz, fst, ent);
9276 }
9277 
9278 ////////////////////////////////////////////////////////////////////////////////
9279 /// Send startup message.
9280 
9281 void TProof::StartupMessage(const char *msg, Bool_t st, Int_t done, Int_t total)
9282 {
9283  PDB(kGlobal,1)
9284  Info("StartupMessage","(%s,%d,%d,%d)", msg, st, done, total);
9285 
9286  EmitVA("StartupMessage(const char*,Bool_t,Int_t,Int_t)",
9287  4, msg, st, done, total);
9288 }
9289 
9290 ////////////////////////////////////////////////////////////////////////////////
9291 /// Send dataset preparation status.
9292 
9293 void TProof::DataSetStatus(const char *msg, Bool_t st, Int_t done, Int_t total)
9294 {
9295  PDB(kGlobal,1)
9296  Info("DataSetStatus","(%s,%d,%d,%d)", msg, st, done, total);
9297 
9298  EmitVA("DataSetStatus(const char*,Bool_t,Int_t,Int_t)",
9299  4, msg, st, done, total);
9300 }
9301 
9302 ////////////////////////////////////////////////////////////////////////////////
9303 /// Send or notify data set status
9304 
9305 void TProof::SendDataSetStatus(const char *action, UInt_t done,
9306  UInt_t tot, Bool_t st)
9307 {
9308  if (IsLite()) {
9309  if (tot) {
9310  TString type = "files";
9311  Int_t frac = (Int_t) (done*100.)/tot;
9312  char msg[512] = {0};
9313  if (frac >= 100) {
9314  snprintf(msg, 512, "%s: OK (%d %s) \n",
9315  action,tot, type.Data());
9316  } else {
9317  snprintf(msg, 512, "%s: %d out of %d (%d %%)\r",
9318  action, done, tot, frac);
9319  }
9320  if (fSync)
9321  fprintf(stderr,"%s", msg);
9322  else
9323  NotifyLogMsg(msg, 0);
9324  }
9325  return;
9326  }
9327 
9328  if (TestBit(TProof::kIsMaster)) {
9329  TMessage mess(kPROOF_DATASET_STATUS);
9330  mess << TString(action) << tot << done << st;
9331  gProofServ->GetSocket()->Send(mess);
9332  }
9333 }
9334 
9335 ////////////////////////////////////////////////////////////////////////////////
9336 /// Notify availability of a query result.
9337 
9338 void TProof::QueryResultReady(const char *ref)
9339 {
9340  PDB(kGlobal,1)
9341  Info("QueryResultReady","ref: %s", ref);
9342 
9343  Emit("QueryResultReady(const char*)",ref);
9344 }
9345 
9346 ////////////////////////////////////////////////////////////////////////////////
9347 /// Validate a TDSet.
9348 
9349 void TProof::ValidateDSet(TDSet *dset)
9350 {
9351  if (dset->ElementsValid()) return;
9352 
9353  TList nodes;
9354  nodes.SetOwner();
9355 
9356  TList slholder;
9357  slholder.SetOwner();
9358  TList elemholder;
9359  elemholder.SetOwner();
9360 
9361  // build nodelist with slaves and elements
9362  TIter nextSlave(GetListOfActiveSlaves());
9363  while (TSlave *sl = dynamic_cast<TSlave*>(nextSlave())) {
9364  TList *sllist = 0;
9365  TPair *p = dynamic_cast<TPair*>(nodes.FindObject(sl->GetName()));
9366  if (!p) {
9367  sllist = new TList;
9368  sllist->SetName(sl->GetName());
9369  slholder.Add(sllist);
9370  TList *elemlist = new TList;
9371  elemlist->SetName(TString(sl->GetName())+"_elem");
9372  elemholder.Add(elemlist);
9373  nodes.Add(new TPair(sllist, elemlist));
9374  } else {
9375  sllist = dynamic_cast<TList*>(p->Key());
9376  }
9377  if (sllist) sllist->Add(sl);
9378  }
9379 
9380  // add local elements to nodes
9381  TList nonLocal; // list of nonlocal elements
9382  // make two iterations - first add local elements - then distribute nonlocals
9383  for (Int_t i = 0; i < 2; i++) {
9384  Bool_t local = i>0?kFALSE:kTRUE;
9385  TIter nextElem(local ? dset->GetListOfElements() : &nonLocal);
9386  while (TDSetElement *elem = dynamic_cast<TDSetElement*>(nextElem())) {
9387  if (elem->GetValid()) continue;
9388  TPair *p = dynamic_cast<TPair*>(local?nodes.FindObject(TUrl(elem->GetFileName()).GetHost()):nodes.At(0));
9389  if (p) {
9390  TList *eli = dynamic_cast<TList*>(p->Value());
9391  TList *sli = dynamic_cast<TList*>(p->Key());
9392  if (eli && sli) {
9393  eli->Add(elem);
9394 
9395  // order list by elements/slave
9396  TPair *p2 = p;
9397  Bool_t stop = kFALSE;
9398  while (!stop) {
9399  TPair *p3 = dynamic_cast<TPair*>(nodes.After(p2->Key()));
9400  if (p3) {
9401  TList *p3v = dynamic_cast<TList*>(p3->Value());
9402  TList *p3k = dynamic_cast<TList*>(p3->Key());
9403  if (p3v && p3k) {
9404  Int_t nelem = p3v->GetSize();
9405  Int_t nsl = p3k->GetSize();
9406  if (nelem*sli->GetSize() < eli->GetSize()*nsl) p2 = p3;
9407  else stop = kTRUE;
9408  }
9409  } else {
9410  stop = kTRUE;
9411  }
9412  }
9413 
9414  if (p2!=p) {
9415  nodes.Remove(p->Key());
9416  nodes.AddAfter(p2->Key(), p);
9417  }
9418  } else {
9419  Warning("ValidateDSet", "invalid values from TPair! Protocol error?");
9420  continue;
9421  }
9422 
9423  } else {
9424  if (local) {
9425  nonLocal.Add(elem);
9426  } else {
9427  Warning("ValidateDSet", "no node to allocate TDSetElement to - ignoring");
9428  }
9429  }
9430  }
9431  }
9432 
9433  // send to slaves
9434  TList usedslaves;
9435  TIter nextNode(&nodes);
9436  SetDSet(dset); // set dset to be validated in Collect()
9437  while (TPair *node = dynamic_cast<TPair*>(nextNode())) {
9438  TList *slaves = dynamic_cast<TList*>(node->Key());
9439  TList *setelements = dynamic_cast<TList*>(node->Value());
9440  if (!slaves || !setelements) continue;
9441  // distribute elements over the slaves
9442  Int_t nslaves = slaves->GetSize();
9443  Int_t nelements = setelements->GetSize();
9444  for (Int_t i=0; i<nslaves; i++) {
9445 
9446  TDSet copyset(dset->GetType(), dset->GetObjName(),
9447  dset->GetDirectory());
9448  for (Int_t j = (i*nelements)/nslaves;
9449  j < ((i+1)*nelements)/nslaves;
9450  j++) {
9451  TDSetElement *elem =
9452  dynamic_cast<TDSetElement*>(setelements->At(j));
9453  if (elem) {
9454  copyset.Add(elem->GetFileName(), elem->GetObjName(),
9455  elem->GetDirectory(), elem->GetFirst(),
9456  elem->GetNum(), elem->GetMsd());
9457  }
9458  }
9459 
9460  if (copyset.GetListOfElements()->GetSize()>0) {
9461  TMessage mesg(kPROOF_VALIDATE_DSET);
9462  mesg << &copyset;
9463 
9464  TSlave *sl = dynamic_cast<TSlave*>(slaves->At(i));
9465  if (sl) {
9466  PDB(kGlobal,1) Info("ValidateDSet",
9467  "Sending TDSet with %d elements to slave %s"
9468  " to be validated",
9469  copyset.GetListOfElements()->GetSize(),
9470  sl->GetOrdinal());
9471  sl->GetSocket()->Send(mesg);
9472  usedslaves.Add(sl);
9473  }
9474  }
9475  }
9476  }
9477 
9478  PDB(kGlobal,1)
9479  Info("ValidateDSet","Calling Collect");
9480  Collect(&usedslaves);
9481  SetDSet(0);
9482 }
9483 
9484 ////////////////////////////////////////////////////////////////////////////////
9485 /// Add data objects that might be needed during the processing of
9486 /// the selector (see Process()). This object can be very large, so they
9487 /// are distributed in an optimized way using a dedicated file.
9488 /// If push is TRUE the input data are sent over even if no apparent change
9489 /// occured to the list.
9490 
9491 void TProof::AddInputData(TObject *obj, Bool_t push)
9492 {
9493  if (obj) {
9494  if (!fInputData) fInputData = new TList;
9495  if (!fInputData->FindObject(obj)) {
9496  fInputData->Add(obj);
9497  SetBit(TProof::kNewInputData);
9498  }
9499  }
9500  if (push) SetBit(TProof::kNewInputData);
9501 }
9502 
9503 ////////////////////////////////////////////////////////////////////////////////
9504 /// Remove obj form the input data list; if obj is null (default), clear the
9505 /// input data info.
9506 
9507 void TProof::ClearInputData(TObject *obj)
9508 {
9509  if (!obj) {
9510  if (fInputData) {
9511  fInputData->SetOwner(kTRUE);
9512  SafeDelete(fInputData);
9513  }
9514  ResetBit(TProof::kNewInputData);
9515 
9516  // Also remove any info about input data in the input list
9517  TObject *o = 0;
9518  TList *in = GetInputList();
9519  while ((o = GetInputList()->FindObject("PROOF_InputDataFile")))
9520  in->Remove(o);
9521  while ((o = GetInputList()->FindObject("PROOF_InputData")))
9522  in->Remove(o);
9523 
9524  // ... and reset the file
9525  fInputDataFile = "";
9526  gSystem->Unlink(kPROOF_InputDataFile);
9527 
9528  } else if (fInputData) {
9529  Int_t sz = fInputData->GetSize();
9530  while (fInputData->FindObject(obj))
9531  fInputData->Remove(obj);
9532  // Flag for update, if anything changed
9533  if (sz != fInputData->GetSize())
9534  SetBit(TProof::kNewInputData);
9535  }
9536 }
9537 
9538 ////////////////////////////////////////////////////////////////////////////////
9539 /// Remove obj 'name' form the input data list;
9540 
9541 void TProof::ClearInputData(const char *name)
9542 {
9543  TObject *obj = (fInputData && name) ? fInputData->FindObject(name) : 0;
9544  if (obj) ClearInputData(obj);
9545 }
9546 
9547 ////////////////////////////////////////////////////////////////////////////////
9548 /// Set the file to be used to optimally distribute the input data objects.
9549 /// If the file exists the object in the file are added to those in the
9550 /// fInputData list. If the file path is null, a default file will be created
9551 /// at the moment of sending the processing request with the content of
9552 /// the fInputData list. See also SendInputDataFile.
9553 
9554 void TProof::SetInputDataFile(const char *datafile)
9555 {
9556  if (datafile && strlen(datafile) > 0) {
9557  if (fInputDataFile != datafile && strcmp(datafile, kPROOF_InputDataFile))
9558  SetBit(TProof::kNewInputData);
9559  fInputDataFile = datafile;
9560  } else {
9561  if (!fInputDataFile.IsNull())
9562  SetBit(TProof::kNewInputData);
9563  fInputDataFile = "";
9564  }
9565  // Make sure that the chosen file is readable
9566  if (fInputDataFile != kPROOF_InputDataFile && !fInputDataFile.IsNull() &&
9567  gSystem->AccessPathName(fInputDataFile, kReadPermission)) {
9568  fInputDataFile = "";
9569  }
9570 }
9571 
9572 ////////////////////////////////////////////////////////////////////////////////
9573 /// Send the input data objects to the master; the objects are taken from the
9574 /// dedicated list and / or the specified file.
9575 /// If the fInputData is empty the specified file is sent over.
9576 /// If there is no specified file, a file named "inputdata.root" is created locally
9577 /// with the content of fInputData and sent over to the master.
9578 /// If both fInputData and the specified file are not empty, a copy of the file
9579 /// is made locally and augmented with the content of fInputData.
9580 
9581 void TProof::SendInputDataFile()
9582 {
9583  // Prepare the file
9584  TString dataFile;
9585  PrepareInputDataFile(dataFile);
9586 
9587  // Send it, if not empty
9588  if (dataFile.Length() > 0) {
9589 
9590  Info("SendInputDataFile", "broadcasting %s", dataFile.Data());
9591  BroadcastFile(dataFile.Data(), kBinary, "cache", kActive);
9592 
9593  // Set the name in the input list
9594  TString t = TString::Format("cache:%s", gSystem->BaseName(dataFile));
9595  AddInput(new TNamed("PROOF_InputDataFile", t.Data()));
9596  }
9597 }
9598 
9599 ////////////////////////////////////////////////////////////////////////////////
9600 /// Prepare the file with the input data objects to be sent the master; the
9601 /// objects are taken from the dedicated list and / or the specified file.
9602 /// If the fInputData is empty the specified file is sent over.
9603 /// If there is no specified file, a file named "inputdata.root" is created locally
9604 /// with the content of fInputData and sent over to the master.
9605 /// If both fInputData and the specified file are not empty, a copy of the file
9606 /// is made locally and augmented with the content of fInputData.
9607 
9608 void TProof::PrepareInputDataFile(TString &dataFile)
9609 {
9610  // Save info about new data for usage in this call;
9611  Bool_t newdata = TestBit(TProof::kNewInputData) ? kTRUE : kFALSE;
9612  // Next time we need some change
9613  ResetBit(TProof::kNewInputData);
9614 
9615  // Check the list
9616  Bool_t list_ok = (fInputData && fInputData->GetSize() > 0) ? kTRUE : kFALSE;
9617  // Check the file
9618  Bool_t file_ok = kFALSE;
9619  if (fInputDataFile != kPROOF_InputDataFile && !fInputDataFile.IsNull() &&
9620  !gSystem->AccessPathName(fInputDataFile, kReadPermission)) {
9621  // It must contain something
9622  TFile *f = TFile::Open(fInputDataFile);
9623  if (f && f->GetListOfKeys() && f->GetListOfKeys()->GetSize() > 0)
9624  file_ok = kTRUE;
9625  }
9626 
9627  // Remove any info about input data in the input list
9628  TObject *o = 0;
9629  TList *in = GetInputList();
9630  while ((o = GetInputList()->FindObject("PROOF_InputDataFile")))
9631  in->Remove(o);
9632  while ((o = GetInputList()->FindObject("PROOF_InputData")))
9633  in->Remove(o);
9634 
9635  // We must have something to send
9636  dataFile = "";
9637  if (!list_ok && !file_ok) return;
9638 
9639  // Three cases:
9640  if (file_ok && !list_ok) {
9641  // Just send the file
9642  dataFile = fInputDataFile;
9643  } else if (!file_ok && list_ok) {
9644  fInputDataFile = kPROOF_InputDataFile;
9645  // Nothing to do, if no new data
9646  if (!newdata && !gSystem->AccessPathName(fInputDataFile)) return;
9647  // Create the file first
9648  TFile *f = TFile::Open(fInputDataFile, "RECREATE");
9649  if (f) {
9650  f->cd();
9651  TIter next(fInputData);
9652  TObject *obj;
9653  while ((obj = next())) {
9654  obj->Write(0, TObject::kSingleKey, 0);
9655  }
9656  f->Close();
9657  SafeDelete(f);
9658  } else {
9659  Error("PrepareInputDataFile", "could not (re-)create %s", fInputDataFile.Data());
9660  return;
9661  }
9662  dataFile = fInputDataFile;
9663  } else if (file_ok && list_ok) {
9664  dataFile = kPROOF_InputDataFile;
9665  // Create the file if not existing or there are new data
9666  if (newdata || gSystem->AccessPathName(dataFile)) {
9667  // Cleanup previous file if obsolete
9668  if (!gSystem->AccessPathName(dataFile))
9669  gSystem->Unlink(dataFile);
9670  if (dataFile != fInputDataFile) {
9671  // Make a local copy first
9672  if (gSystem->CopyFile(fInputDataFile, dataFile, kTRUE) != 0) {
9673  Error("PrepareInputDataFile", "could not make local copy of %s", fInputDataFile.Data());
9674  return;
9675  }
9676  }
9677  // Add the input data list
9678  TFile *f = TFile::Open(dataFile, "UPDATE");
9679  if (f) {
9680  f->cd();
9681  TIter next(fInputData);
9682  TObject *obj = 0;
9683  while ((obj = next())) {
9684  obj->Write(0, TObject::kSingleKey, 0);
9685  }
9686  f->Close();
9687  SafeDelete(f);
9688  } else {
9689  Error("PrepareInputDataFile", "could not open %s for updating", dataFile.Data());
9690  return;
9691  }
9692  }
9693  }
9694 
9695  // Done
9696  return;
9697 }
9698 
9699 ////////////////////////////////////////////////////////////////////////////////
9700 /// Add objects that might be needed during the processing of
9701 /// the selector (see Process()).
9702 
9703 void TProof::AddInput(TObject *obj)
9704 {
9705  if (fPlayer) fPlayer->AddInput(obj);
9706 }
9707 
9708 ////////////////////////////////////////////////////////////////////////////////
9709 /// Clear input object list.
9710 
9711 void TProof::ClearInput()
9712 {
9713  if (fPlayer) fPlayer->ClearInput();
9714 
9715  // the system feedback list is always in the input list
9716  AddInput(fFeedback);
9717 }
9718 
9719 ////////////////////////////////////////////////////////////////////////////////
9720 /// Get input list.
9721 
9722 TList *TProof::GetInputList()
9723 {
9724  return (fPlayer ? fPlayer->GetInputList() : (TList *)0);
9725 }
9726 
9727 ////////////////////////////////////////////////////////////////////////////////
9728 /// Get specified object that has been produced during the processing
9729 /// (see Process()).
9730 
9731 TObject *TProof::GetOutput(const char *name)
9732 {
9733 
9734  if (TestBit(TProof::kIsMaster))
9735  // Can be called by MarkBad on the master before the player is initialized
9736  return (fPlayer) ? fPlayer->GetOutput(name) : (TObject *)0;
9737 
9738  // This checks also associated output files
9739  return (GetOutputList()) ? GetOutputList()->FindObject(name) : (TObject *)0;
9740 }
9741 
9742 ////////////////////////////////////////////////////////////////////////////////
9743 /// Find object 'name' in list 'out' or in the files specified in there
9744 
9745 TObject *TProof::GetOutput(const char *name, TList *out)
9746 {
9747  TObject *o = 0;
9748  if (!name || (name && strlen(name) <= 0) ||
9749  !out || (out && out->GetSize() <= 0)) return o;
9750  if ((o = out->FindObject(name))) return o;
9751 
9752  // For the time being we always check for all the files; this may require
9753  // some caching
9754  TProofOutputFile *pf = 0;
9755  TIter nxo(out);
9756  while ((o = nxo())) {
9757  if ((pf = dynamic_cast<TProofOutputFile *> (o))) {
9758  TFile *f = 0;
9759  if (!(f = (TFile *) gROOT->GetListOfFiles()->FindObject(pf->GetOutputFileName()))) {
9760  TString fn = TString::Format("%s/%s", pf->GetDir(), pf->GetFileName());
9761  f = TFile::Open(fn.Data());
9762  if (!f || (f && f->IsZombie())) {
9763  ::Warning("TProof::GetOutput", "problems opening file %s", fn.Data());
9764  }
9765  }
9766  if (f && (o = f->Get(name))) return o;
9767  }
9768  }
9769 
9770  // Done, unsuccessfully
9771  return o;
9772 }
9773 
9774 ////////////////////////////////////////////////////////////////////////////////
9775 /// Get list with all object created during processing (see Process()).
9776 
9777 TList *TProof::GetOutputList()
9778 {
9779  if (fOutputList.GetSize() > 0) return &fOutputList;
9780  if (fPlayer) {
9781  fOutputList.AttachList(fPlayer->GetOutputList());
9782  return &fOutputList;
9783  }
9784  return (TList *)0;
9785 }
9786 
9787 ////////////////////////////////////////////////////////////////////////////////
9788 /// Set input list parameter. If the parameter is already
9789 /// set it will be set to the new value.
9790 
9791 void TProof::SetParameter(const char *par, const char *value)
9792 {
9793  if (!fPlayer) {
9794  Warning("SetParameter", "player undefined! Ignoring");
9795  return;
9796  }
9797 
9798  TList *il = fPlayer->GetInputList();
9799  TObject *item = il->FindObject(par);
9800  if (item) {
9801  il->Remove(item);
9802  delete item;
9803  }
9804  il->Add(new TNamed(par, value));
9805 }
9806 
9807 ////////////////////////////////////////////////////////////////////////////////
9808 /// Set an input list parameter.
9809 
9810 void TProof::SetParameter(const char *par, Int_t value)
9811 {
9812  if (!fPlayer) {
9813  Warning("SetParameter", "player undefined! Ignoring");
9814  return;
9815  }
9816 
9817  TList *il = fPlayer->GetInputList();
9818  TObject *item = il->FindObject(par);
9819  if (item) {
9820  il->Remove(item);
9821  delete item;
9822  }
9823  il->Add(new TParameter<Int_t>(par, value));
9824 }
9825 
9826 ////////////////////////////////////////////////////////////////////////////////
9827 /// Set an input list parameter.
9828 
9829 void TProof::SetParameter(const char *par, Long_t value)
9830 {
9831  if (!fPlayer) {
9832  Warning("SetParameter", "player undefined! Ignoring");
9833  return;
9834  }
9835 
9836  TList *il = fPlayer->GetInputList();
9837  TObject *item = il->FindObject(par);
9838  if (item) {
9839  il->Remove(item);
9840  delete item;
9841  }
9842  il->Add(new TParameter<Long_t>(par, value));
9843 }
9844 
9845 ////////////////////////////////////////////////////////////////////////////////
9846 /// Set an input list parameter.
9847 
9848 void TProof::SetParameter(const char *par, Long64_t value)
9849 {
9850  if (!fPlayer) {
9851  Warning("SetParameter", "player undefined! Ignoring");
9852  return;
9853  }
9854 
9855  TList *il = fPlayer->GetInputList();
9856  TObject *item = il->FindObject(par);
9857  if (item) {
9858  il->Remove(item);
9859  delete item;
9860  }
9861  il->Add(new TParameter<Long64_t>(par, value));
9862 }
9863 
9864 ////////////////////////////////////////////////////////////////////////////////
9865 /// Set an input list parameter.
9866 
9867 void TProof::SetParameter(const char *par, Double_t value)
9868 {
9869  if (!fPlayer) {
9870  Warning("SetParameter", "player undefined! Ignoring");
9871  return;
9872  }
9873 
9874  TList *il = fPlayer->GetInputList();
9875  TObject *item = il->FindObject(par);
9876  if (item) {
9877  il->Remove(item);
9878  delete item;
9879  }
9880  il->Add(new TParameter<Double_t>(par, value));
9881 }
9882 
9883 ////////////////////////////////////////////////////////////////////////////////
9884 /// Get specified parameter. A parameter set via SetParameter() is either
9885 /// a TParameter or a TNamed or 0 in case par is not defined.
9886 
9887 TObject *TProof::GetParameter(const char *par) const
9888 {
9889  if (!fPlayer) {
9890  Warning("GetParameter", "player undefined! Ignoring");
9891  return (TObject *)0;
9892  }
9893 
9894  TList *il = fPlayer->GetInputList();
9895  return il->FindObject(par);
9896 }
9897 
9898 ////////////////////////////////////////////////////////////////////////////////
9899 /// Delete the input list parameters specified by a wildcard (e.g. PROOF_*)
9900 /// or exact name (e.g. PROOF_MaxSlavesPerNode).
9901 
9902 void TProof::DeleteParameters(const char *wildcard)
9903 {
9904  if (!fPlayer) return;
9905 
9906  if (!wildcard) wildcard = "";
9907  TRegexp re(wildcard, kTRUE);
9908  Int_t nch = strlen(wildcard);
9909 
9910  TList *il = fPlayer->GetInputList();
9911  if (il) {
9912  TObject *p = 0;
9913  TIter next(il);
9914  while ((p = next())) {
9915  TString s = p->GetName();
9916  if (nch && s != wildcard && s.Index(re) == kNPOS) continue;
9917  il->Remove(p);
9918  delete p;
9919  }
9920  }
9921 }
9922 
9923 ////////////////////////////////////////////////////////////////////////////////
9924 /// Show the input list parameters specified by the wildcard.
9925 /// Default is the special PROOF control parameters (PROOF_*).
9926 
9927 void TProof::ShowParameters(const char *wildcard) const
9928 {
9929  if (!fPlayer) return;
9930 
9931  if (!wildcard) wildcard = "";
9932  TRegexp re(wildcard, kTRUE);
9933  Int_t nch = strlen(wildcard);
9934 
9935  TList *il = fPlayer->GetInputList();
9936  TObject *p;
9937  TIter next(il);
9938  while ((p = next())) {
9939  TString s = p->GetName();
9940  if (nch && s != wildcard && s.Index(re) == kNPOS) continue;
9941  if (p->IsA() == TNamed::Class()) {
9942  Printf("%s\t\t\t%s", s.Data(), p->GetTitle());
9943  } else if (p->IsA() == TParameter<Long_t>::Class()) {
9944  Printf("%s\t\t\t%ld", s.Data(), dynamic_cast<TParameter<Long_t>*>(p)->GetVal());
9945  } else if (p->IsA() == TParameter<Long64_t>::Class()) {
9946  Printf("%s\t\t\t%lld", s.Data(), dynamic_cast<TParameter<Long64_t>*>(p)->GetVal());
9947  } else if (p->IsA() == TParameter<Double_t>::Class()) {
9948  Printf("%s\t\t\t%f", s.Data(), dynamic_cast<TParameter<Double_t>*>(p)->GetVal());
9949  } else {
9950  Printf("%s\t\t\t%s", s.Data(), p->GetTitle());
9951  }
9952  }
9953 }
9954 
9955 ////////////////////////////////////////////////////////////////////////////////
9956 /// Add object to feedback list.
9957 
9958 void TProof::AddFeedback(const char *name)
9959 {
9960  PDB(kFeedback, 3)
9961  Info("AddFeedback", "Adding object \"%s\" to feedback", name);
9962  if (fFeedback->FindObject(name) == 0)
9963  fFeedback->Add(new TObjString(name));
9964 }
9965 
9966 ////////////////////////////////////////////////////////////////////////////////
9967 /// Remove object from feedback list.
9968 
9969 void TProof::RemoveFeedback(const char *name)
9970 {
9971  TObject *obj = fFeedback->FindObject(name);
9972  if (obj != 0) {
9973  fFeedback->Remove(obj);
9974  delete obj;
9975  }
9976 }
9977 
9978 ////////////////////////////////////////////////////////////////////////////////
9979 /// Clear feedback list.
9980 
9981 void TProof::ClearFeedback()
9982 {
9983  fFeedback->Delete();
9984 }
9985 
9986 ////////////////////////////////////////////////////////////////////////////////
9987 /// Show items in feedback list.
9988 
9989 void TProof::ShowFeedback() const
9990 {
9991  if (fFeedback->GetSize() == 0) {
9992  Info("","no feedback requested");
9993  return;
9994  }
9995 
9996  fFeedback->Print();
9997 }
9998 
9999 ////////////////////////////////////////////////////////////////////////////////
10000 /// Return feedback list.
10001 
10002 TList *TProof::GetFeedbackList() const
10003 {
10004  return fFeedback;
10005 }
10006 
10007 ////////////////////////////////////////////////////////////////////////////////
10008 /// Creates a tree header (a tree with nonexisting files) object for
10009 /// the DataSet.
10010 
10011 TTree *TProof::GetTreeHeader(TDSet *dset)
10012 {
10013  TList *l = GetListOfActiveSlaves();
10014  TSlave *sl = (TSlave*) l->First();
10015  if (sl == 0) {
10016  Error("GetTreeHeader", "No connection");
10017  return 0;
10018  }
10019 
10020  TSocket *soc = sl->GetSocket();
10021  TMessage msg(kPROOF_GETTREEHEADER);
10022 
10023  msg << dset;
10024 
10025  soc->Send(msg);
10026 
10027  TMessage *reply;
10028  Int_t d = -1;
10029  if (fProtocol >= 20) {
10030  Collect(sl, fCollectTimeout, kPROOF_GETTREEHEADER);
10031  reply = (TMessage *) fRecvMessages->First();
10032  } else {
10033  d = soc->Recv(reply);
10034  }
10035  if (!reply) {
10036  Error("GetTreeHeader", "Error getting a replay from the master.Result %d", (int) d);
10037  return 0;
10038  }
10039 
10040  TString s1;
10041  TTree *t = 0;
10042  (*reply) >> s1;
10043  if (s1 == "Success")
10044  (*reply) >> t;
10045 
10046  PDB(kGlobal, 1) {
10047  if (t) {
10048  Info("GetTreeHeader", "%s, message size: %d, entries: %d",
10049  s1.Data(), reply->BufferSize(), (int) t->GetMaxEntryLoop());
10050  } else {
10051  Info("GetTreeHeader", "tree header retrieval failed");
10052  }
10053  }
10054  delete reply;
10055 
10056  return t;
10057 }
10058 
10059 ////////////////////////////////////////////////////////////////////////////////
10060 /// Draw feedback creation proxy. When accessed via TProof avoids
10061 /// link dependency on libProofPlayer.
10062 
10063 TDrawFeedback *TProof::CreateDrawFeedback()
10064 {
10065  return (fPlayer ? fPlayer->CreateDrawFeedback(this) : (TDrawFeedback *)0);
10066 }
10067 
10068 ////////////////////////////////////////////////////////////////////////////////
10069 /// Set draw feedback option.
10070 
10071 void TProof::SetDrawFeedbackOption(TDrawFeedback *f, Option_t *opt)
10072 {
10073  if (fPlayer) fPlayer->SetDrawFeedbackOption(f, opt);
10074 }
10075 
10076 ////////////////////////////////////////////////////////////////////////////////
10077 /// Delete draw feedback object.
10078 
10079 void TProof::DeleteDrawFeedback(TDrawFeedback *f)
10080 {
10081  if (fPlayer) fPlayer->DeleteDrawFeedback(f);
10082 }
10083 
10084 ////////////////////////////////////////////////////////////////////////////////
10085 /// FIXME: to be written
10086 
10087 TList *TProof::GetOutputNames()
10088 {
10089  return 0;
10090 /*
10091  TMessage msg(kPROOF_GETOUTPUTLIST);
10092  TList* slaves = fActiveSlaves;
10093  Broadcast(msg, slaves);
10094  TMonitor mon;
10095  TList* outputList = new TList();
10096 
10097  TIter si(slaves);
10098  TSlave *slave;
10099  while ((slave = (TSlave*)si.Next()) != 0) {
10100  PDB(kGlobal,4) Info("GetOutputNames","Socket added to monitor: %p (%s)",
10101  slave->GetSocket(), slave->GetName());
10102  mon.Add(slave->GetSocket());
10103  }
10104  mon.ActivateAll();
10105  ((TProof*)gProof)->DeActivateAsyncInput();
10106  ((TProof*)gProof)->fCurrentMonitor = &mon;
10107 
10108  while (mon.GetActive() != 0) {
10109  TSocket *sock = mon.Select();
10110  if (!sock) {
10111  Error("GetOutputList","TMonitor::.Select failed!");
10112  break;
10113  }
10114  mon.DeActivate(sock);
10115  TMessage *reply;
10116  if (sock->Recv(reply) <= 0) {
10117  MarkBad(slave, "receive failed after kPROOF_GETOUTPUTLIST request");
10118 // Error("GetOutputList","Recv failed! for slave-%d (%s)",
10119 // slave->GetOrdinal(), slave->GetName());
10120  continue;
10121  }
10122  if (reply->What() != kPROOF_GETOUTPUTNAMES ) {
10123 // Error("GetOutputList","unexpected message %d from slawe-%d (%s)", reply->What(),
10124 // slave->GetOrdinal(), slave->GetName());
10125  MarkBad(slave, "wrong reply to kPROOF_GETOUTPUTLIST request");
10126  continue;
10127  }
10128  TList* l;
10129 
10130  (*reply) >> l;
10131  TIter next(l);
10132  TNamed *n;
10133  while ( (n = dynamic_cast<TNamed*> (next())) ) {
10134  if (!outputList->FindObject(n->GetName()))
10135  outputList->Add(n);
10136  }
10137  delete reply;
10138  }
10139  ((TProof*)gProof)->fCurrentMonitor = 0;
10140 
10141  return outputList;
10142 */
10143 }
10144 
10145 ////////////////////////////////////////////////////////////////////////////////
10146 /// Build the PROOF's structure in the browser.
10147 
10148 void TProof::Browse(TBrowser *b)
10149 {
10150  b->Add(fActiveSlaves, fActiveSlaves->Class(), "fActiveSlaves");
10151  b->Add(&fMaster, fMaster.Class(), "fMaster");
10152  b->Add(fFeedback, fFeedback->Class(), "fFeedback");
10153  b->Add(fChains, fChains->Class(), "fChains");
10154 
10155  if (fPlayer) {
10156  b->Add(fPlayer->GetInputList(), fPlayer->GetInputList()->Class(), "InputList");
10157  if (fPlayer->GetOutputList())
10158  b->Add(fPlayer->GetOutputList(), fPlayer->GetOutputList()->Class(), "OutputList");
10159  if (fPlayer->GetListOfResults())
10160  b->Add(fPlayer->GetListOfResults(),
10161  fPlayer->GetListOfResults()->Class(), "ListOfResults");
10162  }
10163 }
10164 
10165 ////////////////////////////////////////////////////////////////////////////////
10166 /// Set a new PROOF player.
10167 
10168 void TProof::SetPlayer(TVirtualProofPlayer *player)
10169 {
10170  if (fPlayer)
10171  delete fPlayer;
10172  fPlayer = player;
10173 };
10174 
10175 ////////////////////////////////////////////////////////////////////////////////
10176 /// Construct a TProofPlayer object. The player string specifies which
10177 /// player should be created: remote, slave, sm (supermaster) or base.
10178 /// Default is remote. Socket is needed in case a slave player is created.
10179 
10180 TVirtualProofPlayer *TProof::MakePlayer(const char *player, TSocket *s)
10181 {
10182  if (!player)
10183  player = "remote";
10184 
10185  SetPlayer(TVirtualProofPlayer::Create(player, this, s));
10186  return GetPlayer();
10187 }
10188 
10189 ////////////////////////////////////////////////////////////////////////////////
10190 /// Add chain to data set
10191 
10192 void TProof::AddChain(TChain *chain)
10193 {
10194  fChains->Add(chain);
10195 }
10196 
10197 ////////////////////////////////////////////////////////////////////////////////
10198 /// Remove chain from data set
10199 
10200 void TProof::RemoveChain(TChain *chain)
10201 {
10202  fChains->Remove(chain);
10203 }
10204 
10205 ////////////////////////////////////////////////////////////////////////////////
10206 /// Ask for remote logs in the range [start, end]. If start == -1 all the
10207 /// messages not yet received are sent back.
10208 
10209 void TProof::GetLog(Int_t start, Int_t end)
10210 {
10211  if (!IsValid() || TestBit(TProof::kIsMaster)) return;
10212 
10213  TMessage msg(kPROOF_LOGFILE);
10214 
10215  msg << start << end;
10216 
10217  Broadcast(msg, kActive);
10218  Collect(kActive, fCollectTimeout);
10219 }
10220 
10221 ////////////////////////////////////////////////////////////////////////////////
10222 /// Fill a TMacro with the log lines since the last reading (fLogFileR)
10223 /// Return (TMacro *)0 if no line was logged.
10224 /// The returned TMacro must be deleted by the caller.
10225 
10226 TMacro *TProof::GetLastLog()
10227 {
10228  TMacro *maclog = 0;
10229 
10230  // Save present offset
10231  off_t nowlog = lseek(fileno(fLogFileR), (off_t) 0, SEEK_CUR);
10232  if (nowlog < 0) {
10233  SysError("GetLastLog",
10234  "problem lseeking log file to current position (errno: %d)", TSystem::GetErrno());
10235  return maclog;
10236  }
10237 
10238  // Get extremes
10239  off_t startlog = nowlog;
10240  off_t endlog = lseek(fileno(fLogFileR), (off_t) 0, SEEK_END);
10241  if (endlog < 0) {
10242  SysError("GetLastLog",
10243  "problem lseeking log file to end position (errno: %d)", TSystem::GetErrno());
10244  return maclog;
10245  }
10246 
10247  // Perhaps nothing to log
10248  UInt_t tolog = (UInt_t)(endlog - startlog);
10249  if (tolog <= 0) return maclog;
10250 
10251  // Set starting point
10252  if (lseek(fileno(fLogFileR), startlog, SEEK_SET) < 0) {
10253  SysError("GetLastLog",
10254  "problem lseeking log file to start position (errno: %d)", TSystem::GetErrno());
10255  return maclog;
10256  }
10257 
10258  // Create the output object
10259  maclog = new TMacro;
10260 
10261  // Now we go
10262  char line[2048];
10263  Int_t wanted = (tolog > sizeof(line)) ? sizeof(line) : tolog;
10264  while (fgets(line, wanted, fLogFileR)) {
10265  Int_t r = strlen(line);
10266  if (r > 0) {
10267  if (line[r-1] == '\n') line[r-1] = '\0';
10268  maclog->AddLine(line);
10269  } else {
10270  // Done
10271  break;
10272  }
10273  tolog -= r;
10274  wanted = (tolog > sizeof(line)) ? sizeof(line) : tolog;
10275  }
10276 
10277  // Restore original pointer
10278  if (lseek(fileno(fLogFileR), nowlog, SEEK_SET) < 0) {
10279  Warning("GetLastLog",
10280  "problem lseeking log file to original position (errno: %d)", TSystem::GetErrno());
10281  }
10282 
10283  // Done
10284  return maclog;
10285 }
10286 
10287 ////////////////////////////////////////////////////////////////////////////////
10288 /// Display log of query pq into the log window frame
10289 
10290 void TProof::PutLog(TQueryResult *pq)
10291 {
10292  if (!pq) return;
10293 
10294  TList *lines = pq->GetLogFile()->GetListOfLines();
10295  if (lines) {
10296  TIter nxl(lines);
10297  TObjString *l = 0;
10298  while ((l = (TObjString *)nxl()))
10299  EmitVA("LogMessage(const char*,Bool_t)", 2, l->GetName(), kFALSE);
10300  }
10301 }
10302 
10303 ////////////////////////////////////////////////////////////////////////////////
10304 /// Display on screen the content of the temporary log file for query
10305 /// in reference
10306 
10307 void TProof::ShowLog(const char *queryref)
10308 {
10309  // Make sure we have all info (GetListOfQueries retrieves the
10310  // head info only)
10311  Retrieve(queryref);
10312 
10313  if (fPlayer) {
10314  if (queryref) {
10315  if (fPlayer->GetListOfResults()) {
10316  TIter nxq(fPlayer->GetListOfResults());
10317  TQueryResult *qr = 0;
10318  while ((qr = (TQueryResult *) nxq()))
10319  if (strstr(queryref, qr->GetTitle()) &&
10320  strstr(queryref, qr->GetName()))
10321  break;
10322  if (qr) {
10323  PutLog(qr);
10324  return;
10325  }
10326 
10327  }
10328  }
10329  }
10330 }
10331 
10332 ////////////////////////////////////////////////////////////////////////////////
10333 /// Display on screen the content of the temporary log file.
10334 /// If qry == -2 show messages from the last (current) query.
10335 /// If qry == -1 all the messages not yet displayed are shown (default).
10336 /// If qry == 0, all the messages in the file are shown.
10337 /// If qry > 0, only the messages related to query 'qry' are shown.
10338 /// For qry != -1 the original file offset is restored at the end
10339 
10340 void TProof::ShowLog(Int_t qry)
10341 {
10342  // Save present offset
10343  off_t nowlog = lseek(fileno(fLogFileR), (off_t) 0, SEEK_CUR);
10344  if (nowlog < 0) {
10345  SysError("ShowLog", "problem lseeking log file (errno: %d)", TSystem::GetErrno());
10346  return;
10347  }
10348 
10349  // Get extremes
10350  off_t startlog = nowlog;
10351  off_t endlog = lseek(fileno(fLogFileR), (off_t) 0, SEEK_END);
10352  if (endlog < 0) {
10353  SysError("ShowLog", "problem lseeking log file (errno: %d)", TSystem::GetErrno());
10354  return;
10355  }
10356 
10357  lseek(fileno(fLogFileR), nowlog, SEEK_SET);
10358  if (qry == 0) {
10359  startlog = 0;
10360  lseek(fileno(fLogFileR), (off_t) 0, SEEK_SET);
10361  } else if (qry != -1) {
10362 
10363  TQueryResult *pq = 0;
10364  if (qry == -2) {
10365  // Pickup the last one
10366  pq = (GetQueryResults()) ? ((TQueryResult *)(GetQueryResults()->Last())) : 0;
10367  if (!pq) {
10368  GetListOfQueries();
10369  if (fQueries)
10370  pq = (TQueryResult *)(fQueries->Last());
10371  }
10372  } else if (qry > 0) {
10373  TList *queries = GetQueryResults();
10374  if (queries) {
10375  TIter nxq(queries);
10376  while ((pq = (TQueryResult *)nxq()))
10377  if (qry == pq->GetSeqNum())
10378  break;
10379  }
10380  if (!pq) {
10381  queries = GetListOfQueries();
10382  TIter nxq(queries);
10383  while ((pq = (TQueryResult *)nxq()))
10384  if (qry == pq->GetSeqNum())
10385  break;
10386  }
10387  }
10388  if (pq) {
10389  PutLog(pq);
10390  return;
10391  } else {
10392  if (gDebug > 0)
10393  Info("ShowLog","query %d not found in list", qry);
10394  qry = -1;
10395  }
10396  }
10397 
10398  // Number of bytes to log
10399  UInt_t tolog = (UInt_t)(endlog - startlog);
10400 
10401  // Perhaps nothing
10402  if (tolog <= 0) {
10403  // Set starting point
10404  lseek(fileno(fLogFileR), startlog, SEEK_SET);
10405  }
10406 
10407  // Now we go
10408  Int_t np = 0;
10409  char line[2048];
10410  Int_t wanted = (tolog > sizeof(line)) ? sizeof(line) : tolog;
10411  while (fgets(line, wanted, fLogFileR)) {
10412 
10413  Int_t r = strlen(line);
10414  if (!SendingLogToWindow()) {
10415  if (line[r-1] != '\n') line[r-1] = '\n';
10416  if (r > 0) {
10417  char *p = line;
10418  while (r) {
10419  Int_t w = write(fileno(stdout), p, r);
10420  if (w < 0) {
10421  SysError("ShowLog", "error writing to stdout");
10422  break;
10423  }
10424  r -= w;
10425  p += w;
10426  }
10427  }
10428  tolog -= strlen(line);
10429  np++;
10430 
10431  // Ask if more is wanted
10432  if (!(np%10)) {
10433  const char *opt = Getline("More (y/n)? [y]");
10434  if (opt[0] == 'n')
10435  break;
10436  }
10437 
10438  // We may be over
10439  if (tolog <= 0)
10440  break;
10441 
10442  // Update wanted bytes
10443  wanted = (tolog > sizeof(line)) ? sizeof(line) : tolog;
10444  } else {
10445  // Log to window
10446  if (line[r-1] == '\n') line[r-1] = 0;
10447  LogMessage(line, kFALSE);
10448  }
10449  }
10450  if (!SendingLogToWindow()) {
10451  // Avoid screwing up the prompt
10452  if (write(fileno(stdout), "\n", 1) != 1)
10453  SysError("ShowLog", "error writing to stdout");
10454  }
10455 
10456  // Restore original pointer
10457  if (qry > -1)
10458  lseek(fileno(fLogFileR), nowlog, SEEK_SET);
10459 }
10460 
10461 ////////////////////////////////////////////////////////////////////////////////
10462 /// Set session with 'id' the default one. If 'id' is not found in the list,
10463 /// the current session is set as default
10464 
10465 void TProof::cd(Int_t id)
10466 {
10467  if (GetManager()) {
10468  TProofDesc *d = GetManager()->GetProofDesc(id);
10469  if (d) {
10470  if (d->GetProof()) {
10471  gProof = d->GetProof();
10472  return;
10473  }
10474  }
10475 
10476  // Id not found or undefined: set as default this session
10477  gProof = this;
10478  }
10479 
10480  return;
10481 }
10482 
10483 ////////////////////////////////////////////////////////////////////////////////
10484 /// Detach this instance to its proofserv.
10485 /// If opt is 'S' or 's' the remote server is shutdown
10486 
10487 void TProof::Detach(Option_t *opt)
10488 {
10489  // Nothing to do if not in contact with proofserv
10490  if (!IsValid()) return;
10491 
10492  // Get worker and socket instances
10493  TSlave *sl = (TSlave *) fActiveSlaves->First();
10494  TSocket *s = 0;
10495  if (!sl || !(sl->IsValid()) || !(s = sl->GetSocket())) {
10496  Error("Detach","corrupted worker instance: wrk:%p, sock:%p", sl, s);
10497  return;
10498  }
10499 
10500  Bool_t shutdown = (strchr(opt,'s') || strchr(opt,'S')) ? kTRUE : kFALSE;
10501 
10502  // If processing, try to stop processing first
10503  if (shutdown && !IsIdle()) {
10504  // Remove pending requests
10505  Remove("cleanupqueue");
10506  // Do not wait for ever, but al least 20 seconds
10507  Long_t timeout = gEnv->GetValue("Proof.ShutdownTimeout", 60);
10508  timeout = (timeout > 20) ? timeout : 20;
10509  // Send stop signal
10510  StopProcess(kFALSE, (Long_t) (timeout / 2));
10511  // Receive results
10512  Collect(kActive, timeout);
10513  }
10514 
10515  // Avoid spurious messages: deactivate new inputs ...
10516  DeActivateAsyncInput();
10517 
10518  // ... and discard existing ones
10519  sl->FlushSocket();
10520 
10521  // Close session (we always close the connection)
10522  Close(opt);
10523 
10524  // Close the progress dialog, if any
10525  if (fProgressDialogStarted)
10526  CloseProgressDialog();
10527 
10528  // Update info in the table of our manager, if any
10529  if (GetManager() && GetManager()->QuerySessions("L")) {
10530  TIter nxd(GetManager()->QuerySessions("L"));
10531  TProofDesc *d = 0;
10532  while ((d = (TProofDesc *)nxd())) {
10533  if (d->GetProof() == this) {
10534  d->SetProof(0);
10535  GetManager()->QuerySessions("L")->Remove(d);
10536  break;
10537  }
10538  }
10539  }
10540 
10541  // Invalidate this instance
10542  fValid = kFALSE;
10543 
10544  return;
10545 }
10546 
10547 ////////////////////////////////////////////////////////////////////////////////
10548 /// Set an alias for this session. If reconnection is supported, the alias
10549 /// will be communicated to the remote coordinator so that it can be recovered
10550 /// when reconnecting
10551 
10552 void TProof::SetAlias(const char *alias)
10553 {
10554  // Set it locally
10555  TNamed::SetTitle(alias);
10556  if (TestBit(TProof::kIsMaster))
10557  // Set the name at the same value
10558  TNamed::SetName(alias);
10559 
10560  // Nothing to do if not in contact with coordinator
10561  if (!IsValid()) return;
10562 
10563  if (!IsProofd() && TestBit(TProof::kIsClient)) {
10564  TSlave *sl = (TSlave *) fActiveSlaves->First();
10565  if (sl)
10566  sl->SetAlias(alias);
10567  }
10568 
10569  return;
10570 }
10571 
10572 ////////////////////////////////////////////////////////////////////////////////
10573 /// *** This function is deprecated and will disappear in future versions ***
10574 /// *** It is just a wrapper around TFile::Cp.
10575 /// *** Please use TProofMgr::UploadFiles.
10576 ///
10577 /// Upload a set of files and save the list of files by name dataSetName.
10578 /// The 'files' argument is a list of TFileInfo objects describing the files
10579 /// as first url.
10580 /// The mask 'opt' is a combination of EUploadOpt:
10581 /// kAppend (0x1) if set true files will be appended to
10582 /// the dataset existing by given name
10583 /// kOverwriteDataSet (0x2) if dataset with given name exited it
10584 /// would be overwritten
10585 /// kNoOverwriteDataSet (0x4) do not overwirte if the dataset exists
10586 /// kOverwriteAllFiles (0x8) overwrite all files that may exist
10587 /// kOverwriteNoFiles (0x10) overwrite none
10588 /// kAskUser (0x0) ask user before overwriteng dataset/files
10589 /// The default value is kAskUser.
10590 /// The user will be asked to confirm overwriting dataset or files unless
10591 /// specified opt provides the answer!
10592 /// If kOverwriteNoFiles is set, then a pointer to TList must be passed as
10593 /// skippedFiles argument. The function will add to this list TFileInfo
10594 /// objects describing all files that existed on the cluster and were
10595 /// not uploaded.
10596 ///
10597 /// Communication Summary
10598 /// Client Master
10599 /// |------------>DataSetName----------->|
10600 /// |<-------kMESS_OK/kMESS_NOTOK<-------| (Name OK/file exist)
10601 /// (*)|-------> call RegisterDataSet ------->|
10602 /// (*) - optional
10603 
10604 Int_t TProof::UploadDataSet(const char *, TList *, const char *, Int_t, TList *)
10605 {
10606  Printf(" *** WARNING: this function is obsolete: it has been replaced by TProofMgr::UploadFiles ***");
10607 
10608  return -1;
10609 }
10610 
10611 ////////////////////////////////////////////////////////////////////////////////
10612 /// *** This function is deprecated and will disappear in future versions ***
10613 /// *** It is just a wrapper around TFile::Cp.
10614 /// *** Please use TProofMgr::UploadFiles.
10615 ///
10616 /// Upload a set of files and save the list of files by name dataSetName.
10617 /// The mask 'opt' is a combination of EUploadOpt:
10618 /// kAppend (0x1) if set true files will be appended to
10619 /// the dataset existing by given name
10620 /// kOverwriteDataSet (0x2) if dataset with given name exited it
10621 /// would be overwritten
10622 /// kNoOverwriteDataSet (0x4) do not overwirte if the dataset exists
10623 /// kOverwriteAllFiles (0x8) overwrite all files that may exist
10624 /// kOverwriteNoFiles (0x10) overwrite none
10625 /// kAskUser (0x0) ask user before overwriteng dataset/files
10626 /// The default value is kAskUser.
10627 /// The user will be asked to confirm overwriting dataset or files unless
10628 /// specified opt provides the answer!
10629 /// If kOverwriteNoFiles is set, then a pointer to TList must be passed as
10630 /// skippedFiles argument. The function will add to this list TFileInfo
10631 /// objects describing all files that existed on the cluster and were
10632 /// not uploaded.
10633 ///
10634 
10635 Int_t TProof::UploadDataSet(const char *, const char *, const char *, Int_t, TList *)
10636 {
10637  Printf(" *** WARNING: this function is obsolete: it has been replaced by TProofMgr::UploadFiles ***");
10638 
10639  return -1;
10640 }
10641 
10642 ////////////////////////////////////////////////////////////////////////////////
10643 /// *** This function is deprecated and will disappear in future versions ***
10644 /// *** It is just a wrapper around TFile::Cp.
10645 /// *** Please use TProofMgr::UploadFiles.
10646 ///
10647 /// Upload files listed in "file" to PROOF cluster.
10648 /// Where file = name of file containing list of files and
10649 /// dataset = dataset name and opt is a combination of EUploadOpt bits.
10650 /// Each file description (line) can include wildcards.
10651 /// Check TFileInfo compatibility
10652 
10653 Int_t TProof::UploadDataSetFromFile(const char *, const char *, const char *, Int_t, TList *)
10654 {
10655  Printf(" *** WARNING: this function is obsolete: it has been replaced by TProofMgr::UploadFiles ***");
10656 
10657  // Done
10658  return -1;
10659 }
10660 
10661 ////////////////////////////////////////////////////////////////////////////////
10662 /// Register the 'dataSet' on the cluster under the current
10663 /// user, group and the given 'dataSetName'.
10664 /// If a dataset with the same name already exists the action fails unless 'opts'
10665 /// contains 'O', in which case the old dataset is overwritten, or contains 'U',
10666 /// in which case 'newDataSet' is added to the existing dataset (duplications are
10667 /// ignored, if any).
10668 /// If 'opts' contains 'V' the dataset files are also verified (if the dataset manager
10669 /// is configured to allow so). By default the dataset is not verified.
10670 /// If 'opts' contains 'T' the in the dataset object (status bits, meta,...)
10671 /// is trusted, i.e. not reset (if the dataset manager is configured to allow so).
10672 /// If 'opts' contains 'S' validation would be run serially (meaningful only if
10673 /// validation is required).
10674 /// Returns kTRUE on success.
10675 
10676 Bool_t TProof::RegisterDataSet(const char *dataSetName,
10677  TFileCollection *dataSet, const char *optStr)
10678 {
10679  // Check TFileInfo compatibility
10680  if (fProtocol < 17) {
10681  Info("RegisterDataSet",
10682  "functionality not available: the server does not have dataset support");
10683  return kFALSE;
10684  }
10685 
10686  if (!dataSetName || strlen(dataSetName) <= 0) {
10687  Info("RegisterDataSet", "specifying a dataset name is mandatory");
10688  return kFALSE;
10689  }
10690 
10691  Bool_t parallelverify = kFALSE;
10692  TString sopt(optStr);
10693  if (sopt.Contains("V") && fProtocol >= 34 && !sopt.Contains("S")) {
10694  // We do verification in parallel later on; just register for now
10695  parallelverify = kTRUE;
10696  sopt.ReplaceAll("V", "");
10697  }
10698  // This would screw up things remotely, make sure is not there
10699  sopt.ReplaceAll("S", "");
10700 
10701  TMessage mess(kPROOF_DATASETS);
10702  mess << Int_t(kRegisterDataSet);
10703  mess << TString(dataSetName);
10704  mess << sopt;
10705  mess.WriteObject(dataSet);
10706  Broadcast(mess);
10707 
10708  Bool_t result = kTRUE;
10709  Collect();
10710  if (fStatus != 0) {
10711  Error("RegisterDataSet", "dataset was not saved");
10712  result = kFALSE;
10713  return result;
10714  }
10715 
10716  // If old server or not verifying in parallel we are done
10717  if (!parallelverify) return result;
10718 
10719  // If we are here it means that we will verify in parallel
10720  sopt += "V";
10721  if (VerifyDataSet(dataSetName, sopt) < 0){
10722  Error("RegisterDataSet", "problems verifying dataset '%s'", dataSetName);
10723  return kFALSE;
10724  }
10725 
10726  // We are done
10727  return kTRUE;
10728 }
10729 
10730 ////////////////////////////////////////////////////////////////////////////////
10731 /// Set/Change the name of the default tree. The tree name may contain
10732 /// subdir specification in the form "subdir/name".
10733 /// Returns 0 on success, -1 otherwise.
10734 
10735 Int_t TProof::SetDataSetTreeName(const char *dataset, const char *treename)
10736 {
10737  // Check TFileInfo compatibility
10738  if (fProtocol < 23) {
10739  Info("SetDataSetTreeName", "functionality not supported by the server");
10740  return -1;
10741  }
10742 
10743  if (!dataset || strlen(dataset) <= 0) {
10744  Info("SetDataSetTreeName", "specifying a dataset name is mandatory");
10745  return -1;
10746  }
10747 
10748  if (!treename || strlen(treename) <= 0) {
10749  Info("SetDataSetTreeName", "specifying a tree name is mandatory");
10750  return -1;
10751  }
10752 
10753  TUri uri(dataset);
10754  TString fragment(treename);
10755  if (!fragment.BeginsWith("/")) fragment.Insert(0, "/");
10756  uri.SetFragment(fragment);
10757 
10758  TMessage mess(kPROOF_DATASETS);
10759  mess << Int_t(kSetDefaultTreeName);
10760  mess << uri.GetUri();
10761  Broadcast(mess);
10762 
10763  Collect();
10764  if (fStatus != 0) {
10765  Error("SetDataSetTreeName", "some error occured: default tree name not changed");
10766  return -1;
10767  }
10768  return 0;
10769 }
10770 
10771 ////////////////////////////////////////////////////////////////////////////////
10772 /// Lists all datasets that match given uri.
10773 /// The 'optStr' can contain a comma-separated list of servers for which the
10774 /// information is wanted. If ':lite:' (case insensitive) is specified in 'optStr'
10775 /// only the global information in the TFileCollection is retrieved; useful to only
10776 /// get the list of available datasets.
10777 
10778 TMap *TProof::GetDataSets(const char *uri, const char *optStr)
10779 {
10780  if (fProtocol < 15) {
10781  Info("GetDataSets",
10782  "functionality not available: the server does not have dataset support");
10783  return 0;
10784  }
10785  if (fProtocol < 31 && strstr(optStr, ":lite:"))
10786  Warning("GetDataSets", "'lite' option not supported by the server");
10787 
10788  TMessage mess(kPROOF_DATASETS);
10789  mess << Int_t(kGetDataSets);
10790  mess << TString(uri ? uri : "");
10791  mess << TString(optStr ? optStr : "");
10792  Broadcast(mess);
10793  Collect(kActive, fCollectTimeout);
10794 
10795  TMap *dataSetMap = 0;
10796  if (fStatus != 0) {
10797  Error("GetDataSets", "error receiving datasets information");
10798  } else {
10799  // Look in the list
10800  TMessage *retMess = (TMessage *) fRecvMessages->First();
10801  if (retMess && retMess->What() == kMESS_OK) {
10802  if (!(dataSetMap = (TMap *)(retMess->ReadObject(TMap::Class()))))
10803  Error("GetDataSets", "error receiving datasets");
10804  } else
10805  Error("GetDataSets", "message not found or wrong type (%p)", retMess);
10806  }
10807 
10808  return dataSetMap;
10809 }
10810 
10811 ////////////////////////////////////////////////////////////////////////////////
10812 /// Shows datasets in locations that match the uri.
10813 /// By default shows the user's datasets and global ones
10814 
10815 void TProof::ShowDataSets(const char *uri, const char* optStr)
10816 {
10817  if (fProtocol < 15) {
10818  Info("ShowDataSets",
10819  "functionality not available: the server does not have dataset support");
10820  return;
10821  }
10822 
10823  TMessage mess(kPROOF_DATASETS);
10824  mess << Int_t(kShowDataSets);
10825  mess << TString(uri ? uri : "");
10826  mess << TString(optStr ? optStr : "");
10827  Broadcast(mess);
10828 
10829  Collect(kActive, fCollectTimeout);
10830  if (fStatus != 0)
10831  Error("ShowDataSets", "error receiving datasets information");
10832 }
10833 
10834 ////////////////////////////////////////////////////////////////////////////////
10835 /// Returns kTRUE if 'dataset' exists, kFALSE otherwise
10836 
10837 Bool_t TProof::ExistsDataSet(const char *dataset)
10838 {
10839  if (fProtocol < 15) {
10840  Info("ExistsDataSet", "functionality not available: the server has an"
10841  " incompatible version of TFileInfo");
10842  return kFALSE;
10843  }
10844 
10845  if (!dataset || strlen(dataset) <= 0) {
10846  Error("ExistsDataSet", "dataset name missing");
10847  return kFALSE;
10848  }
10849 
10850  TMessage msg(kPROOF_DATASETS);
10851  msg << Int_t(kCheckDataSetName) << TString(dataset);
10852  Broadcast(msg);
10853  Collect(kActive, fCollectTimeout);
10854  if (fStatus == -1) {
10855  // The dataset exists
10856  return kTRUE;
10857  }
10858  // The dataset does not exists
10859  return kFALSE;
10860 }
10861 
10862 ////////////////////////////////////////////////////////////////////////////////
10863 /// Clear the content of the dataset cache, if any (matching 'dataset', if defined).
10864 
10865 void TProof::ClearDataSetCache(const char *dataset)
10866 {
10867  if (fProtocol < 28) {
10868  Info("ClearDataSetCache", "functionality not available on server");
10869  return;
10870  }
10871 
10872  TMessage msg(kPROOF_DATASETS);
10873  msg << Int_t(kCache) << TString(dataset) << TString("clear");
10874  Broadcast(msg);
10875  Collect(kActive, fCollectTimeout);
10876  // Done
10877  return;
10878 }
10879 
10880 ////////////////////////////////////////////////////////////////////////////////
10881 /// Display the content of the dataset cache, if any (matching 'dataset', if defined).
10882 
10883 void TProof::ShowDataSetCache(const char *dataset)
10884 {
10885  if (fProtocol < 28) {
10886  Info("ShowDataSetCache", "functionality not available on server");
10887  return;
10888  }
10889 
10890  TMessage msg(kPROOF_DATASETS);
10891  msg << Int_t(kCache) << TString(dataset) << TString("show");
10892  Broadcast(msg);
10893  Collect(kActive, fCollectTimeout);
10894  // Done
10895  return;
10896 }
10897 
10898 ////////////////////////////////////////////////////////////////////////////////
10899 /// Get a list of TFileInfo objects describing the files of the specified
10900 /// dataset.
10901 /// To get the short version (containing only the global meta information)
10902 /// specify optStr = "S:" or optStr = "short:".
10903 /// To get the sub-dataset of files located on a given server(s) specify
10904 /// the list of servers (comma-separated) in the 'optStr' field.
10905 
10906 TFileCollection *TProof::GetDataSet(const char *uri, const char *optStr)
10907 {
10908  if (fProtocol < 15) {
10909  Info("GetDataSet", "functionality not available: the server has an"
10910  " incompatible version of TFileInfo");
10911  return 0;
10912  }
10913 
10914  if (!uri || strlen(uri) <= 0) {
10915  Info("GetDataSet", "specifying a dataset name is mandatory");
10916  return 0;
10917  }
10918 
10919  TMessage nameMess(kPROOF_DATASETS);
10920  nameMess << Int_t(kGetDataSet);
10921  nameMess << TString(uri);
10922  nameMess << TString(optStr ? optStr: "");
10923  if (Broadcast(nameMess) < 0)
10924  Error("GetDataSet", "sending request failed");
10925 
10926  Collect(kActive, fCollectTimeout);
10927  TFileCollection *fileList = 0;
10928  if (fStatus != 0) {
10929  Error("GetDataSet", "error receiving datasets information");
10930  } else {
10931  // Look in the list
10932  TMessage *retMess = (TMessage *) fRecvMessages->First();
10933  if (retMess && retMess->What() == kMESS_OK) {
10934  if (!(fileList = (TFileCollection*)(retMess->ReadObject(TFileCollection::Class()))))
10935  Error("GetDataSet", "error reading list of files");
10936  } else
10937  Error("GetDataSet", "message not found or wrong type (%p)", retMess);
10938  }
10939 
10940  return fileList;
10941 }
10942 
10943 ////////////////////////////////////////////////////////////////////////////////
10944 /// display meta-info for given dataset usi
10945 
10946 void TProof::ShowDataSet(const char *uri, const char* opt)
10947 {
10948  TFileCollection *fileList = 0;
10949  if ((fileList = GetDataSet(uri))) {
10950  fileList->Print(opt);
10951  delete fileList;
10952  } else
10953  Warning("ShowDataSet","no such dataset: %s", uri);
10954 }
10955 
10956 ////////////////////////////////////////////////////////////////////////////////
10957 /// Remove the specified dataset from the PROOF cluster.
10958 /// Files are not deleted.
10959 
10960 Int_t TProof::RemoveDataSet(const char *uri, const char* optStr)
10961 {
10962  TMessage nameMess(kPROOF_DATASETS);
10963  nameMess << Int_t(kRemoveDataSet);
10964  nameMess << TString(uri?uri:"");
10965  nameMess << TString(optStr?optStr:"");
10966  if (Broadcast(nameMess) < 0)
10967  Error("RemoveDataSet", "sending request failed");
10968  Collect(kActive, fCollectTimeout);
10969 
10970  if (fStatus != 0)
10971  return -1;
10972  else
10973  return 0;
10974 }
10975 
10976 ////////////////////////////////////////////////////////////////////////////////
10977 /// Find datasets, returns in a TList all found datasets.
10978 
10979 TList* TProof::FindDataSets(const char* /*searchString*/, const char* /*optStr*/)
10980 {
10981  Error ("FindDataSets", "not yet implemented");
10982  return (TList *) 0;
10983 }
10984 
10985 ////////////////////////////////////////////////////////////////////////////////
10986 /// Allows users to request staging of a particular dataset. Requests are
10987 /// saved in a special dataset repository and must be honored by the endpoint.
10988 
10989 Bool_t TProof::RequestStagingDataSet(const char *dataset)
10990 {
10991  if (fProtocol < 35) {
10992  Error("RequestStagingDataSet",
10993  "functionality not supported by the server");
10994  return kFALSE;
10995  }
10996 
10997  TMessage mess(kPROOF_DATASETS);
10998  mess << Int_t(kRequestStaging);
10999  mess << TString(dataset);
11000  Broadcast(mess);
11001 
11002  Collect();
11003  if (fStatus != 0) {
11004  Error("RequestStagingDataSet", "staging request was unsuccessful");
11005  return kFALSE;
11006  }
11007 
11008  return kTRUE;
11009 }
11010 
11011 ////////////////////////////////////////////////////////////////////////////////
11012 /// Cancels a dataset staging request. Returns kTRUE on success, kFALSE on
11013 /// failure. Dataset not found equals to a failure.
11014 
11015 Bool_t TProof::CancelStagingDataSet(const char *dataset)
11016 {
11017  if (fProtocol < 36) {
11018  Error("CancelStagingDataSet",
11019  "functionality not supported by the server");
11020  return kFALSE;
11021  }
11022 
11023  TMessage mess(kPROOF_DATASETS);
11024  mess << Int_t(kCancelStaging);
11025  mess << TString(dataset);
11026  Broadcast(mess);
11027 
11028  Collect();
11029  if (fStatus != 0) {
11030  Error("CancelStagingDataSet", "cancel staging request was unsuccessful");
11031  return kFALSE;
11032  }
11033 
11034  return kTRUE;
11035 }
11036 
11037 ////////////////////////////////////////////////////////////////////////////////
11038 /// Obtains a TFileCollection showing the staging status of the specified
11039 /// dataset. A valid dataset manager and dataset staging requests repository
11040 /// must be present on the endpoint.
11041 
11042 TFileCollection *TProof::GetStagingStatusDataSet(const char *dataset)
11043 {
11044  if (fProtocol < 35) {
11045  Error("GetStagingStatusDataSet",
11046  "functionality not supported by the server");
11047  return NULL;
11048  }
11049 
11050  TMessage nameMess(kPROOF_DATASETS);
11051  nameMess << Int_t(kStagingStatus);
11052  nameMess << TString(dataset);
11053  if (Broadcast(nameMess) < 0) {
11054  Error("GetStagingStatusDataSet", "sending request failed");
11055  return NULL;
11056  }
11057 
11058  Collect(kActive, fCollectTimeout);
11059  TFileCollection *fc = NULL;
11060 
11061  if (fStatus < 0) {
11062  Error("GetStagingStatusDataSet", "problem processing the request");
11063  }
11064  else if (fStatus == 0) {
11065  TMessage *retMess = (TMessage *)fRecvMessages->First();
11066  if (retMess && (retMess->What() == kMESS_OK)) {
11067  fc = (TFileCollection *)(
11068  retMess->ReadObject(TFileCollection::Class()) );
11069  if (!fc)
11070  Error("GetStagingStatusDataSet", "error reading list of files");
11071  }
11072  else {
11073  Error("GetStagingStatusDataSet",
11074  "response message not found or wrong type (%p)", retMess);
11075  }
11076  }
11077  //else {}
11078 
11079  return fc;
11080 }
11081 
11082 ////////////////////////////////////////////////////////////////////////////////
11083 /// Like GetStagingStatusDataSet, but displays results immediately.
11084 
11085 void TProof::ShowStagingStatusDataSet(const char *dataset, const char *opt)
11086 {
11087  TFileCollection *fc = GetStagingStatusDataSet(dataset);
11088  if (fc) {
11089  fc->Print(opt);
11090  delete fc;
11091  }
11092 }
11093 
11094 ////////////////////////////////////////////////////////////////////////////////
11095 /// Verify if all files in the specified dataset are available.
11096 /// Print a list and return the number of missing files.
11097 /// Returns -1 in case of error.
11098 
11099 Int_t TProof::VerifyDataSet(const char *uri, const char *optStr)
11100 {
11101  if (fProtocol < 15) {
11102  Info("VerifyDataSet", "functionality not available: the server has an"
11103  " incompatible version of TFileInfo");
11104  return -1;
11105  }
11106 
11107  // Sanity check
11108  if (!uri || (uri && strlen(uri) <= 0)) {
11109  Error("VerifyDataSet", "dataset name is is mandatory");
11110  return -1;
11111  }
11112 
11113  Int_t nmissingfiles = 0;
11114 
11115  TString sopt(optStr);
11116  if (fProtocol < 34 || sopt.Contains("S")) {
11117  sopt.ReplaceAll("S", "");
11118  Info("VerifyDataSet", "Master-only verification");
11119  TMessage nameMess(kPROOF_DATASETS);
11120  nameMess << Int_t(kVerifyDataSet);
11121  nameMess << TString(uri);
11122  nameMess << sopt;
11123  Broadcast(nameMess);
11124 
11125  Collect(kActive, fCollectTimeout);
11126 
11127  if (fStatus < 0) {
11128  Info("VerifyDataSet", "no such dataset %s", uri);
11129  return -1;
11130  } else
11131  nmissingfiles = fStatus;
11132  return nmissingfiles;
11133  }
11134 
11135  // Request for parallel verification: can only be done if we have workers
11136  if (!IsParallel() && !fDynamicStartup) {
11137  Error("VerifyDataSet", "PROOF is in sequential mode (no workers): cannot do parallel verification.");
11138  Error("VerifyDataSet", "Either start PROOF with some workers or force sequential adding 'S' as option.");
11139  return -1;
11140  }
11141 
11142  // Do parallel verification
11143  return VerifyDataSetParallel(uri, optStr);
11144 }
11145 
11146 ////////////////////////////////////////////////////////////////////////////////
11147 /// Internal function for parallel dataset verification used TProof::VerifyDataSet and
11148 /// TProofLite::VerifyDataSet
11149 
11150 Int_t TProof::VerifyDataSetParallel(const char *uri, const char *optStr)
11151 {
11152  Int_t nmissingfiles = 0;
11153 
11154  // Let PROOF master prepare node-files map
11155  SetParameter("PROOF_FilesToProcess", Form("dataset:%s", uri));
11156 
11157  // Use TPacketizerFile
11158  TString oldpack;
11159  if (TProof::GetParameter(GetInputList(), "PROOF_Packetizer", oldpack) != 0) oldpack = "";
11160  SetParameter("PROOF_Packetizer", "TPacketizerFile");
11161 
11162  // Add dataset name
11163  SetParameter("PROOF_VerifyDataSet", uri);
11164  // Add options
11165  SetParameter("PROOF_VerifyDataSetOption", optStr);
11166  SetParameter("PROOF_SavePartialResults", (Int_t)0);
11167  Int_t oldifiip = -1;
11168  if (TProof::GetParameter(GetInputList(), "PROOF_IncludeFileInfoInPacket", oldifiip) != 0) oldifiip = -1;
11169  SetParameter("PROOF_IncludeFileInfoInPacket", (Int_t)1);
11170 
11171  // TO DO : figure out mss and stageoption
11172  const char* mss="";
11173  SetParameter("PROOF_MSS", mss);
11174  const char* stageoption="";
11175  SetParameter("PROOF_StageOption", stageoption);
11176 
11177  // Process verification in parallel
11178  Process("TSelVerifyDataSet", (Long64_t) 1);
11179 
11180  // Restore packetizer
11181  if (!oldpack.IsNull())
11182  SetParameter("PROOF_Packetizer", oldpack);
11183  else
11184  DeleteParameters("PROOF_Packetizer");
11185 
11186  // Delete or restore parameters
11187  DeleteParameters("PROOF_FilesToProcess");
11188  DeleteParameters("PROOF_VerifyDataSet");
11189  DeleteParameters("PROOF_VerifyDataSetOption");
11190  DeleteParameters("PROOF_MSS");
11191  DeleteParameters("PROOF_StageOption");
11192  if (oldifiip > -1) {
11193  SetParameter("PROOF_IncludeFileInfoInPacket", oldifiip);
11194  } else {
11195  DeleteParameters("PROOF_IncludeFileInfoInPacket");
11196  }
11197  DeleteParameters("PROOF_SavePartialResults");
11198 
11199  // Merge outputs
11200  Int_t nopened = 0;
11201  Int_t ntouched = 0;
11202  Bool_t changed_ds = kFALSE;
11203 
11204  TIter nxtout(GetOutputList());
11205  TObject* obj;
11206  TList *lfiindout = new TList;
11207  while ((obj = nxtout())) {
11208  TList *l = dynamic_cast<TList *>(obj);
11209  if (l && TString(l->GetName()).BeginsWith("PROOF_ListFileInfos_")) {
11210  TIter nxt(l);
11211  TFileInfo *fiindout = 0;
11212  while ((fiindout = (TFileInfo*) nxt())) {
11213  lfiindout->Add(fiindout);
11214  }
11215  }
11216  // Add up number of disppeared files
11217  TParameter<Int_t>* pdisappeared = dynamic_cast<TParameter<Int_t>*>(obj);
11218  if ( pdisappeared && TString(pdisappeared->GetName()).BeginsWith("PROOF_NoFilesDisppeared_")) {
11219  nmissingfiles += pdisappeared->GetVal();
11220  }
11221  TParameter<Int_t>* pnopened = dynamic_cast<TParameter<Int_t>*>(obj);
11222  if (pnopened && TString(pnopened->GetName()).BeginsWith("PROOF_NoFilesOpened_")) {
11223  nopened += pnopened->GetVal();
11224  }
11225  TParameter<Int_t>* pntouched = dynamic_cast<TParameter<Int_t>*>(obj);
11226  if (pntouched && TString(pntouched->GetName()).BeginsWith("PROOF_NoFilesTouched_")) {
11227  ntouched += pntouched->GetVal();
11228  }
11229  TParameter<Bool_t>* pchanged_ds = dynamic_cast<TParameter<Bool_t>*>(obj);
11230  if (pchanged_ds && TString(pchanged_ds->GetName()).BeginsWith("PROOF_DataSetChanged_")) {
11231  if (pchanged_ds->GetVal() == kTRUE) changed_ds = kTRUE;
11232  }
11233  }
11234 
11235  Info("VerifyDataSetParallel", "%s: changed? %d (# files opened = %d, # files touched = %d,"
11236  " # missing files = %d)",
11237  uri, changed_ds, nopened, ntouched, nmissingfiles);
11238  // Done
11239  return nmissingfiles;
11240 }
11241 
11242 ////////////////////////////////////////////////////////////////////////////////
11243 /// returns a map of the quotas of all groups
11244 
11245 TMap *TProof::GetDataSetQuota(const char* optStr)
11246 {
11247  if (IsLite()) {
11248  Info("UploadDataSet", "Lite-session: functionality not implemented");
11249  return (TMap *)0;
11250  }
11251 
11252  TMessage mess(kPROOF_DATASETS);
11253  mess << Int_t(kGetQuota);
11254  mess << TString(optStr?optStr:"");
11255  Broadcast(mess);
11256 
11257  Collect(kActive, fCollectTimeout);
11258  TMap *groupQuotaMap = 0;
11259  if (fStatus < 0) {
11260  Info("GetDataSetQuota", "could not receive quota");
11261  } else {
11262  // Look in the list
11263  TMessage *retMess = (TMessage *) fRecvMessages->First();
11264  if (retMess && retMess->What() == kMESS_OK) {
11265  if (!(groupQuotaMap = (TMap*)(retMess->ReadObject(TMap::Class()))))
11266  Error("GetDataSetQuota", "error getting quotas");
11267  } else
11268  Error("GetDataSetQuota", "message not found or wrong type (%p)", retMess);
11269  }
11270 
11271  return groupQuotaMap;
11272 }
11273 
11274 ////////////////////////////////////////////////////////////////////////////////
11275 /// shows the quota and usage of all groups
11276 /// if opt contains "U" shows also distribution of usage on user-level
11277 
11278 void TProof::ShowDataSetQuota(Option_t* opt)
11279 {
11280  if (fProtocol < 15) {
11281  Info("ShowDataSetQuota",
11282  "functionality not available: the server does not have dataset support");
11283  return;
11284  }
11285 
11286  if (IsLite()) {
11287  Info("UploadDataSet", "Lite-session: functionality not implemented");
11288  return;
11289  }
11290 
11291  TMessage mess(kPROOF_DATASETS);
11292  mess << Int_t(kShowQuota);
11293  mess << TString(opt?opt:"");
11294  Broadcast(mess);
11295 
11296  Collect();
11297  if (fStatus != 0)
11298  Error("ShowDataSetQuota", "error receiving quota information");
11299 }
11300 
11301 ////////////////////////////////////////////////////////////////////////////////
11302 /// If in active in a monitor set ready state
11303 
11304 void TProof::InterruptCurrentMonitor()
11305 {
11306  if (fCurrentMonitor)
11307  fCurrentMonitor->Interrupt();
11308 }
11309 
11310 ////////////////////////////////////////////////////////////////////////////////
11311 /// Make sure that the worker identified by the ordinal number 'ord' is
11312 /// in the active list. The request will be forwarded to the master
11313 /// in direct contact with the worker. If needed, this master will move
11314 /// the worker from the inactive to the active list and rebuild the list
11315 /// of unique workers.
11316 /// Use ord = "*" to activate all inactive workers.
11317 /// The string 'ord' can also be a comma-separated list of ordinal numbers the
11318 /// status of which will be modified at once.
11319 /// Return <0 if something went wrong (-2 if at least one worker was not found)
11320 /// or the number of workers with status change (on master; 0 on client).
11321 
11322 Int_t TProof::ActivateWorker(const char *ord, Bool_t save)
11323 {
11324  return ModifyWorkerLists(ord, kTRUE, save);
11325 }
11326 
11327 ////////////////////////////////////////////////////////////////////////////////
11328 /// Remove the worker identified by the ordinal number 'ord' from the
11329 /// the active list. The request will be forwarded to the master
11330 /// in direct contact with the worker. If needed, this master will move
11331 /// the worker from the active to the inactive list and rebuild the list
11332 /// of unique workers.
11333 /// Use ord = "*" to deactivate all active workers.
11334 /// The string 'ord' can also be a comma-separated list of ordinal numbers the
11335 /// status of which will be modified at once.
11336 /// Return <0 if something went wrong (-2 if at least one worker was not found)
11337 /// or the number of workers with status change (on master; 0 on client).
11338 
11339 Int_t TProof::DeactivateWorker(const char *ord, Bool_t save)
11340 {
11341  return ModifyWorkerLists(ord, kFALSE, save);
11342 }
11343 
11344 ////////////////////////////////////////////////////////////////////////////////
11345 /// Modify the worker active/inactive list by making the worker identified by
11346 /// the ordinal number 'ord' active (add == TRUE) or inactive (add == FALSE).
11347 /// The string 'ord' can also be a comma-separated list of ordinal numbers the
11348 /// status of which will be modified at once.
11349 /// If needed, the request will be forwarded to the master in direct contact
11350 /// with the worker. The end-master will move the worker from one list to the
11351 /// other active and rebuild the list of unique active workers.
11352 /// Use ord = "*" to deactivate all active workers.
11353 /// If save is TRUE the current active list is saved before any modification is
11354 /// done; re-running with ord = "restore" restores the saved list
11355 /// Return <0 if something went wrong (-2 if at least one worker was not found)
11356 /// or the number of workers with status change (on master; 0 on client).
11357 
11358 Int_t TProof::ModifyWorkerLists(const char *ord, Bool_t add, Bool_t save)
11359 {
11360  // Make sure the input make sense
11361  if (!ord || strlen(ord) <= 0) {
11362  Info("ModifyWorkerLists",
11363  "an ordinal number - e.g. \"0.4\" or \"*\" for all - is required as input");
11364  return -1;
11365  }
11366  if (gDebug > 0)
11367  Info("ModifyWorkerLists", "ord: '%s' (add: %d, save: %d)", ord, add, save);
11368 
11369  Int_t nwc = 0;
11370  Bool_t restoring = !strcmp(ord, "restore") ? kTRUE : kFALSE;
11371  if (IsEndMaster()) {
11372  if (restoring) {
11373  // We are asked to restore the previous settings
11374  nwc = RestoreActiveList();
11375  } else {
11376  if (save) SaveActiveList();
11377  }
11378  }
11379 
11380  Bool_t allord = strcmp(ord, "*") ? kFALSE : kTRUE;
11381 
11382  // Check if this is for us
11383  if (TestBit(TProof::kIsMaster) && gProofServ) {
11384  if (!allord &&
11385  strncmp(ord, gProofServ->GetOrdinal(), strlen(gProofServ->GetOrdinal())))
11386  return 0;
11387  }
11388 
11389  Bool_t fw = kTRUE; // Whether to forward one step down
11390  Bool_t rs = kFALSE; // Whether to rescan for unique workers
11391 
11392  // Appropriate list pointing
11393  TList *in = (add) ? fInactiveSlaves : fActiveSlaves;
11394  TList *out = (add) ? fActiveSlaves : fInactiveSlaves;
11395 
11396  if (IsEndMaster() && !restoring) {
11397  // Create the hash list of ordinal numbers
11398  THashList *ords = 0;
11399  if (!allord) {
11400  ords = new THashList();
11401  const char *masterord = (gProofServ) ? gProofServ->GetOrdinal() : "0";
11402  TString oo(ord), o;
11403  Int_t from = 0;
11404  while(oo.Tokenize(o, from, ","))
11405  if (o.BeginsWith(masterord)) ords->Add(new TObjString(o));
11406  }
11407  // We do not need to send forward
11408  fw = kFALSE;
11409  // Look for the worker in the initial list
11410  TObject *os = 0;
11411  TSlave *wrk = 0;
11412  if (in->GetSize() > 0) {
11413  TIter nxw(in);
11414  while ((wrk = (TSlave *) nxw())) {
11415  os = 0;
11416  if (allord || (ords && (os = ords->FindObject(wrk->GetOrdinal())))) {
11417  // Add it to the final list
11418  if (!out->FindObject(wrk)) {
11419  out->Add(wrk);
11420  if (add)
11421  fActiveMonitor->Add(wrk->GetSocket());
11422  }
11423  // Remove it from the initial list
11424  in->Remove(wrk);
11425  if (!add) {
11426  fActiveMonitor->Remove(wrk->GetSocket());
11427  wrk->SetStatus(TSlave::kInactive);
11428  } else
11429  wrk->SetStatus(TSlave::kActive);
11430  // Count
11431  nwc++;
11432  // Nothing to forward (ord is unique)
11433  fw = kFALSE;
11434  // Rescan for unique workers (active list modified)
11435  rs = kTRUE;
11436  // We may be done, if not option 'all'
11437  if (!allord && ords) {
11438  if (os) ords->Remove(os);
11439  if (ords->GetSize() == 0) break;
11440  SafeDelete(os);
11441  }
11442  }
11443  }
11444  }
11445  // If some worker not found, notify it if at the end
11446  if (!fw && ords && ords->GetSize() > 0) {
11447  TString oo;
11448  TIter nxo(ords);
11449  while ((os = nxo())) {
11450  TIter nxw(out);
11451  while ((wrk = (TSlave *) nxw()))
11452  if (!strcmp(os->GetName(), wrk->GetOrdinal())) break;
11453  if (!wrk) {
11454  if (!oo.IsNull()) oo += ",";
11455  oo += os->GetName();
11456  }
11457  }
11458  if (!oo.IsNull()) {
11459  Warning("ModifyWorkerLists", "worker(s) '%s' not found!", oo.Data());
11460  nwc = -2;
11461  }
11462  }
11463  // Cleanup hash list
11464  if (ords) {
11465  ords->Delete();
11466  SafeDelete(ords);
11467  }
11468  }
11469 
11470  // Rescan for unique workers
11471  if (rs)
11472  FindUniqueSlaves();
11473 
11474  // Forward the request one step down, if needed
11475  Int_t action = (add) ? (Int_t) kActivateWorker : (Int_t) kDeactivateWorker;
11476  if (fw) {
11477  if (fProtocol > 32) {
11478  TMessage mess(kPROOF_WORKERLISTS);
11479  mess << action << TString(ord);
11480  Broadcast(mess);
11481  Collect(kActive, fCollectTimeout);
11482  if (fStatus != 0) {
11483  nwc = (fStatus < nwc) ? fStatus : nwc;
11484  if (fStatus == -2) {
11485  if (gDebug > 0)
11486  Warning("ModifyWorkerLists", "request not completely full filled");
11487  } else {
11488  Error("ModifyWorkerLists", "request failed");
11489  }
11490  }
11491  } else {
11492  TString oo(ord), o;
11493  if (oo.Contains(","))
11494  Warning("ModifyWorkerLists", "block request not supported by server: splitting into pieces ...");
11495  Int_t from = 0;
11496  while(oo.Tokenize(o, from, ",")) {
11497  TMessage mess(kPROOF_WORKERLISTS);
11498  mess << action << o;
11499  Broadcast(mess);
11500  Collect(kActive, fCollectTimeout);
11501  }
11502  }
11503  }
11504  // Done
11505  return nwc;
11506 }
11507 
11508 ////////////////////////////////////////////////////////////////////////////////
11509 /// Save current list of active workers
11510 
11511 void TProof::SaveActiveList()
11512 {
11513  if (!fActiveSlavesSaved.IsNull()) fActiveSlavesSaved = "";
11514  if (fInactiveSlaves->GetSize() == 0) {
11515  fActiveSlavesSaved = "*";
11516  } else {
11517  TIter nxw(fActiveSlaves);
11518  TSlave *wk = 0;
11519  while ((wk = (TSlave *)nxw())) { fActiveSlavesSaved += TString::Format("%s,", wk->GetOrdinal()); }
11520  }
11521 }
11522 
11523 ////////////////////////////////////////////////////////////////////////////////
11524 /// Restore saved list of active workers
11525 
11526 Int_t TProof::RestoreActiveList()
11527 {
11528  // Clear the current active list
11529  DeactivateWorker("*", kFALSE);
11530  // Restore the previous active list
11531  if (!fActiveSlavesSaved.IsNull())
11532  return ActivateWorker(fActiveSlavesSaved, kFALSE);
11533 
11534  return 0;
11535 }
11536 
11537 ////////////////////////////////////////////////////////////////////////////////
11538 /// Start a PROOF session on a specific cluster. If cluster is 0 (the
11539 /// default) then the PROOF Session Viewer GUI pops up and 0 is returned.
11540 /// If cluster is "lite://" we start a PROOF-lite session.
11541 /// If cluster is "" (empty string) then we connect to the cluster specified
11542 /// by 'Proof.LocalDefault', defaulting to "lite://".
11543 /// If cluster is "pod://" (case insensitive), then we connect to a PROOF cluster
11544 /// managed by PROOF on Demand (PoD, http://pod.gsi.de ).
11545 /// Via conffile a specific PROOF config file in the confir directory can be specified.
11546 /// Use loglevel to set the default loging level for debugging.
11547 /// The appropriate instance of TProofMgr is created, if not
11548 /// yet existing. The instantiated TProof object is returned.
11549 /// Use TProof::cd() to switch between PROOF sessions.
11550 /// For more info on PROOF see the TProof ctor.
11551 
11552 TProof *TProof::Open(const char *cluster, const char *conffile,
11553  const char *confdir, Int_t loglevel)
11554 {
11555  const char *pn = "TProof::Open";
11556 
11557  // Make sure libProof and dependents are loaded and TProof can be created,
11558  // dependents are loaded via the information in the [system].rootmap file
11559  if (!cluster) {
11560 
11561  TPluginManager *pm = gROOT->GetPluginManager();
11562  if (!pm) {
11563  ::Error(pn, "plugin manager not found");
11564  return 0;
11565  }
11566 
11567  if (gROOT->IsBatch()) {
11568  ::Error(pn, "we are in batch mode, cannot show PROOF Session Viewer");
11569  return 0;
11570  }
11571  // start PROOF Session Viewer
11572  TPluginHandler *sv = pm->FindHandler("TSessionViewer", "");
11573  if (!sv) {
11574  ::Error(pn, "no plugin found for TSessionViewer");
11575  return 0;
11576  }
11577  if (sv->LoadPlugin() == -1) {
11578  ::Error(pn, "plugin for TSessionViewer could not be loaded");
11579  return 0;
11580  }
11581  sv->ExecPlugin(0);
11582  return 0;
11583 
11584  } else {
11585 
11586  TString clst(cluster);
11587 
11588  // Check for PoD cluster
11589  if (PoDCheckUrl( &clst ) < 0) return 0;
11590 
11591  if (clst.BeginsWith("workers=")) clst.Insert(0, "lite:///?");
11592  if (clst.BeginsWith("tunnel=")) clst.Insert(0, "/?");
11593 
11594  // Parse input URL
11595  TUrl u(clst);
11596 
11597  // *** GG, 060711: this does not seem to work any more (at XrdClient level)
11598  // *** to be investigated (it is not really needed; static tunnels work).
11599  // Dynamic tunnel:
11600  // Parse any tunning info ("<cluster>/?tunnel=[<tunnel_host>:]tunnel_port)
11601  TString opts(u.GetOptions());
11602  if (!opts.IsNull()) {
11603  Int_t it = opts.Index("tunnel=");
11604  if (it != kNPOS) {
11605  TString sport = opts(it + strlen("tunnel="), opts.Length());
11606  TString host("127.0.0.1");
11607  Int_t port = -1;
11608  Int_t ic = sport.Index(":");
11609  if (ic != kNPOS) {
11610  // Isolate the host
11611  host = sport(0, ic);
11612  sport.Remove(0, ic + 1);
11613  }
11614  if (!sport.IsDigit()) {
11615  // Remove the non digit part
11616  TRegexp re("[^0-9]");
11617  Int_t ind = sport.Index(re);
11618  if (ind != kNPOS)
11619  sport.Remove(ind);
11620  }
11621  // Set the port
11622  if (sport.IsDigit())
11623  port = sport.Atoi();
11624  if (port > 0) {
11625  // Set the relevant variables
11626  ::Info("TProof::Open","using tunnel at %s:%d", host.Data(), port);
11627  gEnv->SetValue("XNet.SOCKS4Host", host);
11628  gEnv->SetValue("XNet.SOCKS4Port", port);
11629  } else {
11630  // Warn parsing problems
11631  ::Warning("TProof::Open",
11632  "problems parsing tunnelling info from options: %s", opts.Data());
11633  }
11634  }
11635  }
11636 
11637  // Find out if we are required to attach to a specific session
11638  Int_t locid = -1;
11639  Bool_t create = kFALSE;
11640  if (opts.Length() > 0) {
11641  if (opts.BeginsWith("N",TString::kIgnoreCase)) {
11642  create = kTRUE;
11643  opts.Remove(0,1);
11644  u.SetOptions(opts);
11645  } else if (opts.IsDigit()) {
11646  locid = opts.Atoi();
11647  }
11648  }
11649 
11650  // Attach-to or create the appropriate manager
11651  TProofMgr *mgr = TProofMgr::Create(u.GetUrl());
11652 
11653  TProof *proof = 0;
11654  if (mgr && mgr->IsValid()) {
11655 
11656  // If XProofd we always attempt an attach first (unless
11657  // explicitly not requested).
11658  Bool_t attach = (create || mgr->IsProofd() || mgr->IsLite()) ? kFALSE : kTRUE;
11659  if (attach) {
11660  TProofDesc *d = 0;
11661  if (locid < 0)
11662  // Get the list of sessions
11663  d = (TProofDesc *) mgr->QuerySessions("")->First();
11664  else
11665  d = (TProofDesc *) mgr->GetProofDesc(locid);
11666  if (d) {
11667  proof = (TProof*) mgr->AttachSession(d);
11668  if (!proof || !proof->IsValid()) {
11669  if (locid)
11670  ::Error(pn, "new session could not be attached");
11671  SafeDelete(proof);
11672  }
11673  }
11674  }
11675 
11676  // start the PROOF session
11677  if (!proof) {
11678  proof = (TProof*) mgr->CreateSession(conffile, confdir, loglevel);
11679  if (!proof || !proof->IsValid()) {
11680  ::Error(pn, "new session could not be created");
11681  SafeDelete(proof);
11682  }
11683  }
11684  }
11685  return proof;
11686  }
11687 }
11688 
11689 ////////////////////////////////////////////////////////////////////////////////
11690 /// Get instance of the effective manager for 'url'
11691 /// Return 0 on failure.
11692 
11693 TProofMgr *TProof::Mgr(const char *url)
11694 {
11695  if (!url)
11696  return (TProofMgr *)0;
11697 
11698  // Attach or create the relevant instance
11699  return TProofMgr::Create(url);
11700 }
11701 
11702 ////////////////////////////////////////////////////////////////////////////////
11703 /// Wrapper around TProofMgr::Reset(...).
11704 
11705 void TProof::Reset(const char *url, Bool_t hard)
11706 {
11707  if (url) {
11708  TProofMgr *mgr = TProof::Mgr(url);
11709  if (mgr && mgr->IsValid())
11710  mgr->Reset(hard);
11711  else
11712  ::Error("TProof::Reset",
11713  "unable to initialize a valid manager instance");
11714  }
11715 }
11716 
11717 ////////////////////////////////////////////////////////////////////////////////
11718 /// Get environemnt variables.
11719 
11720 const TList *TProof::GetEnvVars()
11721 {
11722  return fgProofEnvList;
11723 }
11724 
11725 ////////////////////////////////////////////////////////////////////////////////
11726 /// Add an variable to the list of environment variables passed to proofserv
11727 /// on the master and slaves
11728 
11729 void TProof::AddEnvVar(const char *name, const char *value)
11730 {
11731  if (gDebug > 0) ::Info("TProof::AddEnvVar","%s=%s", name, value);
11732 
11733  if (fgProofEnvList == 0) {
11734  // initialize the list if needed
11735  fgProofEnvList = new TList;
11736  fgProofEnvList->SetOwner();
11737  } else {
11738  // replace old entries with the same name
11739  TObject *o = fgProofEnvList->FindObject(name);
11740  if (o != 0) {
11741  fgProofEnvList->Remove(o);
11742  }
11743  }
11744  fgProofEnvList->Add(new TNamed(name, value));
11745 }
11746 
11747 ////////////////////////////////////////////////////////////////////////////////
11748 /// Remove an variable from the list of environment variables passed to proofserv
11749 /// on the master and slaves
11750 
11751 void TProof::DelEnvVar(const char *name)
11752 {
11753  if (fgProofEnvList == 0) return;
11754 
11755  TObject *o = fgProofEnvList->FindObject(name);
11756  if (o != 0) {
11757  fgProofEnvList->Remove(o);
11758  }
11759 }
11760 
11761 ////////////////////////////////////////////////////////////////////////////////
11762 /// Clear the list of environment variables passed to proofserv
11763 /// on the master and slaves
11764 
11765 void TProof::ResetEnvVars()
11766 {
11767  if (fgProofEnvList == 0) return;
11768 
11769  SafeDelete(fgProofEnvList);
11770 }
11771 
11772 ////////////////////////////////////////////////////////////////////////////////
11773 /// Save information about the worker set in the file .workers in the working
11774 /// dir. Called each time there is a change in the worker setup, e.g. by
11775 /// TProof::MarkBad().
11776 
11777 void TProof::SaveWorkerInfo()
11778 {
11779  // We must be masters
11780  if (TestBit(TProof::kIsClient))
11781  return;
11782 
11783  // We must have a server defined
11784  if (!gProofServ) {
11785  Error("SaveWorkerInfo","gProofServ undefined");
11786  return;
11787  }
11788 
11789  // The relevant lists must be defined
11790  if (!fSlaves && !fBadSlaves) {
11791  Warning("SaveWorkerInfo","all relevant worker lists is undefined");
11792  return;
11793  }
11794 
11795  // Create or truncate the file first
11796  TString fnwrk = TString::Format("%s/.workers",
11797  gSystem->DirName(gProofServ->GetSessionDir()));
11798  FILE *fwrk = fopen(fnwrk.Data(),"w");
11799  if (!fwrk) {
11800  Error("SaveWorkerInfo",
11801  "cannot open %s for writing (errno: %d)", fnwrk.Data(), errno);
11802  return;
11803  }
11804 
11805  // Do we need to register an additional line for another log?
11806  TString addlogext;
11807  TString addLogTag;
11808  if (gSystem->Getenv("PROOF_ADDITIONALLOG")) {
11809  addlogext = gSystem->Getenv("PROOF_ADDITIONALLOG");
11810  TPMERegexp reLogTag("^__(.*)__\\.log"); // $
11811  if (reLogTag.Match(addlogext) == 2) {
11812  addLogTag = reLogTag[1];
11813  }
11814  else {
11815  addLogTag = "+++";
11816  }
11817  if (gDebug > 0)
11818  Info("SaveWorkerInfo", "request for additional line with ext: '%s'", addlogext.Data());
11819  }
11820 
11821  // Used to eliminate datetime and PID from workdir to obtain log file name
11822  TPMERegexp re("(.*?)-[0-9]+-[0-9]+$");
11823 
11824  // Loop over the list of workers (active is any worker not flagged as bad)
11825  TIter nxa(fSlaves);
11826  TSlave *wrk = 0;
11827  TString logfile;
11828  while ((wrk = (TSlave *) nxa())) {
11829  Int_t status = (fBadSlaves && fBadSlaves->FindObject(wrk)) ? 0 : 1;
11830  logfile = wrk->GetWorkDir();
11831  if (re.Match(logfile) == 2) logfile = re[1];
11832  else continue; // invalid (should not happen)
11833  // Write out record for this worker
11834  fprintf(fwrk,"%s@%s:%d %d %s %s.log\n",
11835  wrk->GetUser(), wrk->GetName(), wrk->GetPort(), status,
11836  wrk->GetOrdinal(), logfile.Data());
11837  // Additional line, if required
11838  if (addlogext.Length() > 0) {
11839  fprintf(fwrk,"%s@%s:%d %d %s(%s) %s.%s\n",
11840  wrk->GetUser(), wrk->GetName(), wrk->GetPort(), status,
11841  wrk->GetOrdinal(), addLogTag.Data(), logfile.Data(), addlogext.Data());
11842  }
11843 
11844  }
11845 
11846  // Loop also over the list of bad workers (if they failed to startup they are not in
11847  // the overall list
11848  TIter nxb(fBadSlaves);
11849  while ((wrk = (TSlave *) nxb())) {
11850  logfile = wrk->GetWorkDir();
11851  if (re.Match(logfile) == 2) logfile = re[1];
11852  else continue; // invalid (should not happen)
11853  if (!fSlaves->FindObject(wrk)) {
11854  // Write out record for this worker
11855  fprintf(fwrk,"%s@%s:%d 0 %s %s.log\n",
11856  wrk->GetUser(), wrk->GetName(), wrk->GetPort(),
11857  wrk->GetOrdinal(), logfile.Data());
11858  }
11859  }
11860 
11861  // Eventually loop over the list of gracefully terminated workers: we'll get
11862  // logfiles from those workers as well. They'll be shown with a special
11863  // status of "2"
11864  TIter nxt(fTerminatedSlaveInfos);
11865  TSlaveInfo *sli;
11866  while (( sli = (TSlaveInfo *)nxt() )) {
11867  logfile = sli->GetDataDir();
11868  if (re.Match(logfile) == 2) logfile = re[1];
11869  else continue; // invalid (should not happen)
11870  fprintf(fwrk, "%s 2 %s %s.log\n",
11871  sli->GetName(), sli->GetOrdinal(), logfile.Data());
11872  // Additional line, if required
11873  if (addlogext.Length() > 0) {
11874  fprintf(fwrk, "%s 2 %s(%s) %s.%s\n",
11875  sli->GetName(), sli->GetOrdinal(), addLogTag.Data(),
11876  logfile.Data(), addlogext.Data());
11877  }
11878  }
11879 
11880  // Close file
11881  fclose(fwrk);
11882 
11883  // We are done
11884  return;
11885 }
11886 
11887 ////////////////////////////////////////////////////////////////////////////////
11888 /// Get the value from the specified parameter from the specified collection.
11889 /// Returns -1 in case of error (i.e. list is 0, parameter does not exist
11890 /// or value type does not match), 0 otherwise.
11891 
11892 Int_t TProof::GetParameter(TCollection *c, const char *par, TString &value)
11893 {
11894  TObject *obj = c ? c->FindObject(par) : (TObject *)0;
11895  if (obj) {
11896  TNamed *p = dynamic_cast<TNamed*>(obj);
11897  if (p) {
11898  value = p->GetTitle();
11899  return 0;
11900  }
11901  }
11902  return -1;
11903 
11904 }
11905 
11906 ////////////////////////////////////////////////////////////////////////////////
11907 /// Get the value from the specified parameter from the specified collection.
11908 /// Returns -1 in case of error (i.e. list is 0, parameter does not exist
11909 /// or value type does not match), 0 otherwise.
11910 
11911 Int_t TProof::GetParameter(TCollection *c, const char *par, Int_t &value)
11912 {
11913  TObject *obj = c ? c->FindObject(par) : (TObject *)0;
11914  if (obj) {
11915  TParameter<Int_t> *p = dynamic_cast<TParameter<Int_t>*>(obj);
11916  if (p) {
11917  value = p->GetVal();
11918  return 0;
11919  }
11920  }
11921  return -1;
11922 }
11923 
11924 ////////////////////////////////////////////////////////////////////////////////
11925 /// Get the value from the specified parameter from the specified collection.
11926 /// Returns -1 in case of error (i.e. list is 0, parameter does not exist
11927 /// or value type does not match), 0 otherwise.
11928 
11929 Int_t TProof::GetParameter(TCollection *c, const char *par, Long_t &value)
11930 {
11931  TObject *obj = c ? c->FindObject(par) : (TObject *)0;
11932  if (obj) {
11933  TParameter<Long_t> *p = dynamic_cast<TParameter<Long_t>*>(obj);
11934  if (p) {
11935  value = p->GetVal();
11936  return 0;
11937  }
11938  }
11939  return -1;
11940 }
11941 
11942 ////////////////////////////////////////////////////////////////////////////////
11943 /// Get the value from the specified parameter from the specified collection.
11944 /// Returns -1 in case of error (i.e. list is 0, parameter does not exist
11945 /// or value type does not match), 0 otherwise.
11946 
11947 Int_t TProof::GetParameter(TCollection *c, const char *par, Long64_t &value)
11948 {
11949  TObject *obj = c ? c->FindObject(par) : (TObject *)0;
11950  if (obj) {
11951  TParameter<Long64_t> *p = dynamic_cast<TParameter<Long64_t>*>(obj);
11952  if (p) {
11953  value = p->GetVal();
11954  return 0;
11955  }
11956  }
11957  return -1;
11958 }
11959 
11960 ////////////////////////////////////////////////////////////////////////////////
11961 /// Get the value from the specified parameter from the specified collection.
11962 /// Returns -1 in case of error (i.e. list is 0, parameter does not exist
11963 /// or value type does not match), 0 otherwise.
11964 
11965 Int_t TProof::GetParameter(TCollection *c, const char *par, Double_t &value)
11966 {
11967  TObject *obj = c ? c->FindObject(par) : (TObject *)0;
11968  if (obj) {
11969  TParameter<Double_t> *p = dynamic_cast<TParameter<Double_t>*>(obj);
11970  if (p) {
11971  value = p->GetVal();
11972  return 0;
11973  }
11974  }
11975  return -1;
11976 }
11977 
11978 ////////////////////////////////////////////////////////////////////////////////
11979 /// Make sure that dataset is in the form to be processed. This may mean
11980 /// retrieving the relevant info from the dataset manager or from the
11981 /// attached input list.
11982 /// Returns 0 on success, -1 on error
11983 
11984 Int_t TProof::AssertDataSet(TDSet *dset, TList *input,
11985  TDataSetManager *mgr, TString &emsg)
11986 {
11987  emsg = "";
11988 
11989  // We must have something to process
11990  if (!dset || !input || !mgr) {
11991  emsg.Form("invalid inputs (%p, %p, %p)", dset, input, mgr);
11992  return -1;
11993  }
11994 
11995  TList *datasets = new TList;
11996  TFileCollection *dataset = 0;
11997  TString lookupopt;
11998  TString dsname(dset->GetName());
11999 
12000  // First extract the "entry list" part on the global name, if any
12001  TString dsns(dsname), enlname;
12002  Ssiz_t eli = dsns.Index("?enl=");
12003  if (eli != kNPOS) {
12004  enlname = dsns(eli + strlen("?enl="), dsns.Length());
12005  dsns.Remove(eli, dsns.Length()-eli);
12006  }
12007 
12008  // The dataset maybe in the form of a TFileCollection in the input list
12009  if (dsname.BeginsWith("TFileCollection:")) {
12010  // Isolate the real name
12011  dsname.ReplaceAll("TFileCollection:", "");
12012  // Get the object
12013  dataset = (TFileCollection *) input->FindObject(dsname);
12014  if (!dataset) {
12015  emsg.Form("TFileCollection %s not found in input list", dset->GetName());
12016  return -1;
12017  }
12018  // Remove from everywhere
12019  input->RecursiveRemove(dataset);
12020  // Add it to the local list
12021  datasets->Add(new TPair(dataset, new TObjString(enlname.Data())));
12022  // Make sure we lookup everything (unless the client or the administrator
12023  // required something else)
12024  if (TProof::GetParameter(input, "PROOF_LookupOpt", lookupopt) != 0) {
12025  lookupopt = gEnv->GetValue("Proof.LookupOpt", "all");
12026  input->Add(new TNamed("PROOF_LookupOpt", lookupopt.Data()));
12027  }
12028  }
12029 
12030  // This is the name we parse for additional specifications, such directory
12031  // and object name; for multiple datasets we assume that the directory and
12032  // and object name are the same for all datasets
12033  TString dsnparse;
12034  // The received message included an empty dataset, with only the name
12035  // defined: assume that a dataset, stored on the PROOF master by that
12036  // name, should be processed.
12037  if (!dataset) {
12038 
12039  TFileCollection *fc = nullptr;
12040 
12041  // Check if the entry list and dataset name are valid. If they have spaces,
12042  // commas, or pipes, they are not considered as valid and we revert to the
12043  // "multiple datasets" case
12044  TRegexp rg("[, |]");
12045  Bool_t validEnl = (enlname.Index(rg) == kNPOS) ? kTRUE : kFALSE;
12046  Bool_t validSdsn = (dsns.Index(rg) == kNPOS) ? kTRUE : kFALSE;
12047 
12048  if (validEnl && validSdsn && (( fc = mgr->GetDataSet(dsns) ))) {
12049 
12050  //
12051  // String corresponds to ONE dataset only
12052  //
12053 
12054  TIter nxfi(fc->GetList());
12055  TFileInfo *fi;
12056  while (( fi = (TFileInfo *)nxfi() ))
12057  fi->SetTitle(dsns.Data());
12058  dataset = fc;
12059  dsnparse = dsns; // without entry list
12060 
12061  // Adds the entry list (or empty string if not specified)
12062  datasets->Add( new TPair(dataset, new TObjString( enlname.Data() )) );
12063 
12064  } else {
12065 
12066  //
12067  // String does NOT correspond to one dataset: check if many datasets
12068  // were specified instead
12069  //
12070 
12071  dsns = dsname.Data();
12072  TString dsn1;
12073  Int_t from1 = 0;
12074  while (dsns.Tokenize(dsn1, from1, "[, ]")) {
12075  TString dsn2;
12076  Int_t from2 = 0;
12077  while (dsn1.Tokenize(dsn2, from2, "|")) {
12078  enlname = "";
12079  Int_t ienl = dsn2.Index("?enl=");
12080  if (ienl != kNPOS) {
12081  enlname = dsn2(ienl + 5, dsn2.Length());
12082  dsn2.Remove(ienl);
12083  }
12084  if ((fc = mgr->GetDataSet(dsn2.Data()))) {
12085  // Save dataset name in TFileInfo's title to use it in TDset
12086  TIter nxfi(fc->GetList());
12087  TFileInfo *fi;
12088  while ((fi = (TFileInfo *) nxfi())) { fi->SetTitle(dsn2.Data()); }
12089  dsnparse = dsn2;
12090  if (!dataset) {
12091  // This is our dataset
12092  dataset = fc;
12093  } else {
12094  // Add it to the dataset
12095  dataset->Add(fc);
12096  SafeDelete(fc);
12097  }
12098  }
12099  }
12100  // The dataset name(s) in the first element
12101  if (dataset) {
12102  if (dataset->GetList()->First())
12103  ((TFileInfo *)(dataset->GetList()->First()))->SetTitle(dsn1.Data());
12104  // Add it to the local list
12105  datasets->Add(new TPair(dataset, new TObjString(enlname.Data())));
12106  }
12107  // Reset the pointer
12108  dataset = 0;
12109  }
12110 
12111  }
12112 
12113  //
12114  // At this point the dataset(s) to be processed, if any, are found in the
12115  // "datasets" variable
12116  //
12117 
12118  if (!datasets || datasets->GetSize() <= 0) {
12119  emsg.Form("no dataset(s) found on the master corresponding to: %s", dsname.Data());
12120  return -1;
12121  } else {
12122  // Make 'dataset' to point to the first one in the list
12123  if (!(dataset = (TFileCollection *) ((TPair *)(datasets->First()))->Key())) {
12124  emsg.Form("dataset pointer is null: corruption? - aborting");
12125  return -1;
12126  }
12127  }
12128  // Apply the lookup option requested by the client or the administartor
12129  // (by default we trust the information in the dataset)
12130  if (TProof::GetParameter(input, "PROOF_LookupOpt", lookupopt) != 0) {
12131  lookupopt = gEnv->GetValue("Proof.LookupOpt", "stagedOnly");
12132  input->Add(new TNamed("PROOF_LookupOpt", lookupopt.Data()));
12133  }
12134  } else {
12135  // We were given a named, single, TFileCollection
12136  dsnparse = dsname;
12137  }
12138 
12139  // Logic for the subdir/obj names: try first to see if the dataset name contains
12140  // some info; if not check the settings in the TDSet object itself; if still empty
12141  // check the default tree name / path in the TFileCollection object; if still empty
12142  // use the default as the flow will determine
12143  TString dsTree;
12144  // Get the [subdir/]tree, if any
12145  mgr->ParseUri(dsnparse.Data(), 0, 0, 0, &dsTree);
12146  if (dsTree.IsNull()) {
12147  // Use what we have in the original dataset; we need this to locate the
12148  // meta data information
12149  dsTree += dset->GetDirectory();
12150  dsTree += dset->GetObjName();
12151  }
12152  if (!dsTree.IsNull() && dsTree != "/") {
12153  TString tree(dsTree);
12154  Int_t idx = tree.Index("/");
12155  if (idx != kNPOS) {
12156  TString dir = tree(0, idx+1);
12157  tree.Remove(0, idx);
12158  dset->SetDirectory(dir);
12159  }
12160  dset->SetObjName(tree);
12161  } else {
12162  // Use the default obj name from the TFileCollection
12163  dsTree = dataset->GetDefaultTreeName();
12164  }
12165 
12166  // Pass dataset server mapping instructions, if any
12167  TList *srvmapsref = TDataSetManager::GetDataSetSrvMaps();
12168  TList *srvmapslist = srvmapsref;
12169  TString srvmaps;
12170  if (TProof::GetParameter(input, "PROOF_DataSetSrvMaps", srvmaps) == 0) {
12171  srvmapslist = TDataSetManager::ParseDataSetSrvMaps(srvmaps);
12172  if (gProofServ) {
12173  TString msg;
12174  if (srvmapsref && !srvmapslist) {
12175  msg.Form("+++ Info: dataset server mapping(s) DISABLED by user");
12176  } else if (srvmapsref && srvmapslist && srvmapslist != srvmapsref) {
12177  msg.Form("+++ Info: dataset server mapping(s) modified by user");
12178  } else if (!srvmapsref && srvmapslist) {
12179  msg.Form("+++ Info: dataset server mapping(s) added by user");
12180  }
12181  gProofServ->SendAsynMessage(msg.Data());
12182  }
12183  }
12184 
12185  // Flag multi-datasets
12186  if (datasets->GetSize() > 1) dset->SetBit(TDSet::kMultiDSet);
12187  // Loop over the list of datasets
12188  TList *listOfMissingFiles = new TList;
12189  TEntryList *entrylist = 0;
12190  TPair *pair = 0;
12191  TIter nxds(datasets);
12192  while ((pair = (TPair *) nxds())) {
12193  // File Collection
12194  dataset = (TFileCollection *) pair->Key();
12195  // Entry list, if any
12196  TEntryList *enl = 0;
12197  TObjString *os = (TObjString *) pair->Value();
12198  if (strlen(os->GetName())) {
12199  if (!(enl = dynamic_cast<TEntryList *>(input->FindObject(os->GetName())))) {
12200  if (gProofServ)
12201  gProofServ->SendAsynMessage(TString::Format("+++ Warning:"
12202  " entry list %s not found", os->GetName()));
12203  }
12204  if (enl && (!(enl->GetLists()) || enl->GetLists()->GetSize() <= 0)) {
12205  if (gProofServ)
12206  gProofServ->SendAsynMessage(TString::Format("+++ Warning:"
12207  " no sub-lists in entry-list!"));
12208  }
12209  }
12210  TList *missingFiles = new TList;
12211  TSeqCollection* files = dataset->GetList();
12212  if (gDebug > 0) files->Print();
12213  Bool_t availableOnly = (lookupopt != "all") ? kTRUE : kFALSE;
12214  if (dset->TestBit(TDSet::kMultiDSet)) {
12215  TDSet *ds = new TDSet(dataset->GetName(), dset->GetObjName(), dset->GetDirectory());
12216  ds->SetSrvMaps(srvmapslist);
12217  if (!ds->Add(files, dsTree, availableOnly, missingFiles)) {
12218  emsg.Form("error integrating dataset %s", dataset->GetName());
12219  continue;
12220  }
12221  // Add the TDSet object to the multi-dataset
12222  dset->Add(ds);
12223  // Add entry list if any
12224  if (enl) ds->SetEntryList(enl);
12225  } else {
12226  dset->SetSrvMaps(srvmapslist);
12227  if (!dset->Add(files, dsTree, availableOnly, missingFiles)) {
12228  emsg.Form("error integrating dataset %s", dataset->GetName());
12229  continue;
12230  }
12231  if (enl) entrylist = enl;
12232  }
12233  if (missingFiles) {
12234  // The missing files objects have to be removed from the dataset
12235  // before delete.
12236  TIter next(missingFiles);
12237  TObject *file;
12238  while ((file = next())) {
12239  dataset->GetList()->Remove(file);
12240  listOfMissingFiles->Add(file);
12241  }
12242  missingFiles->SetOwner(kFALSE);
12243  missingFiles->Clear();
12244  }
12245  SafeDelete(missingFiles);
12246  }
12247  // Cleanup; we need to do this because pairs do no delete their content
12248  nxds.Reset();
12249  while ((pair = (TPair *) nxds())) {
12250  if (pair->Key()) delete pair->Key();
12251  if (pair->Value()) delete pair->Value();
12252  }
12253  datasets->SetOwner(kTRUE);
12254  SafeDelete(datasets);
12255 
12256  // Cleanup the server mapping list, if created by the user
12257  if (srvmapslist && srvmapslist != srvmapsref) {
12258  srvmapslist->SetOwner(kTRUE);
12259  SafeDelete(srvmapslist);
12260  }
12261 
12262  // Set the global entrylist, if required
12263  if (entrylist) dset->SetEntryList(entrylist);
12264 
12265  // Make sure it will be sent back merged with other similar lists created
12266  // during processing; this list will be transferred by the player to the
12267  // output list, once the latter has been created (see TProofPlayerRemote::Process)
12268  if (listOfMissingFiles && listOfMissingFiles->GetSize() > 0) {
12269  listOfMissingFiles->SetName("MissingFiles");
12270  input->Add(listOfMissingFiles);
12271  }
12272 
12273  // Done
12274  return 0;
12275 }
12276 
12277 ////////////////////////////////////////////////////////////////////////////////
12278 /// Save input data file from 'cachedir' into the sandbox or create a the file
12279 /// with input data objects
12280 
12281 Int_t TProof::SaveInputData(TQueryResult *qr, const char *cachedir, TString &emsg)
12282 {
12283  TList *input = 0;
12284 
12285  // We must have got something to process
12286  if (!qr || !(input = qr->GetInputList()) ||
12287  !cachedir || strlen(cachedir) <= 0) return 0;
12288 
12289  // There must be some input data or input data file
12290  TNamed *data = (TNamed *) input->FindObject("PROOF_InputDataFile");
12291  TList *inputdata = (TList *) input->FindObject("PROOF_InputData");
12292  if (!data && !inputdata) return 0;
12293  // Default dstination filename
12294  if (!data)
12295  input->Add((data = new TNamed("PROOF_InputDataFile", kPROOF_InputDataFile)));
12296 
12297  TString dstname(data->GetTitle()), srcname;
12298  Bool_t fromcache = kFALSE;
12299  if (dstname.BeginsWith("cache:")) {
12300  fromcache = kTRUE;
12301  dstname.ReplaceAll("cache:", "");
12302  srcname.Form("%s/%s", cachedir, dstname.Data());
12303  if (gSystem->AccessPathName(srcname)) {
12304  emsg.Form("input data file not found in cache (%s)", srcname.Data());
12305  return -1;
12306  }
12307  }
12308 
12309  // If from cache, just move the cache file
12310  if (fromcache) {
12311  if (gSystem->CopyFile(srcname, dstname, kTRUE) != 0) {
12312  emsg.Form("problems copying %s to %s", srcname.Data(), dstname.Data());
12313  return -1;
12314  }
12315  } else {
12316  // Create the file
12317  if (inputdata && inputdata->GetSize() > 0) {
12318  TFile *f = TFile::Open(dstname.Data(), "RECREATE");
12319  if (f) {
12320  f->cd();
12321  inputdata->Write();
12322  f->Close();
12323  delete f;
12324  } else {
12325  emsg.Form("could not create %s", dstname.Data());
12326  return -1;
12327  }
12328  } else {
12329  emsg.Form("no input data!");
12330  return -1;
12331  }
12332  }
12333  ::Info("TProof::SaveInputData", "input data saved to %s", dstname.Data());
12334 
12335  // Save the file name and clean up the data list
12336  data->SetTitle(dstname);
12337  if (inputdata) {
12338  input->Remove(inputdata);
12339  inputdata->SetOwner();
12340  delete inputdata;
12341  }
12342 
12343  // Done
12344  return 0;
12345 }
12346 
12347 ////////////////////////////////////////////////////////////////////////////////
12348 /// Send the input data file to the workers
12349 
12350 Int_t TProof::SendInputData(TQueryResult *qr, TProof *p, TString &emsg)
12351 {
12352  TList *input = 0;
12353 
12354  // We must have got something to process
12355  if (!qr || !(input = qr->GetInputList())) return 0;
12356 
12357  // There must be some input data or input data file
12358  TNamed *inputdata = (TNamed *) input->FindObject("PROOF_InputDataFile");
12359  if (!inputdata) return 0;
12360 
12361  TString fname(inputdata->GetTitle());
12362  if (gSystem->AccessPathName(fname)) {
12363  emsg.Form("input data file not found in sandbox (%s)", fname.Data());
12364  return -1;
12365  }
12366 
12367  // PROOF session must available
12368  if (!p || !p->IsValid()) {
12369  emsg.Form("TProof object undefined or invalid: protocol error!");
12370  return -1;
12371  }
12372 
12373  // Send to unique workers and submasters
12374  p->BroadcastFile(fname, TProof::kBinary, "cache");
12375 
12376  // Done
12377  return 0;
12378 }
12379 
12380 ////////////////////////////////////////////////////////////////////////////////
12381 /// Get the input data from the file defined in the input list
12382 
12383 Int_t TProof::GetInputData(TList *input, const char *cachedir, TString &emsg)
12384 {
12385  // We must have got something to process
12386  if (!input || !cachedir || strlen(cachedir) <= 0) return 0;
12387 
12388  // There must be some input data or input data file
12389  TNamed *inputdata = (TNamed *) input->FindObject("PROOF_InputDataFile");
12390  if (!inputdata) return 0;
12391 
12392  TString fname;
12393  fname.Form("%s/%s", cachedir, inputdata->GetTitle());
12394  if (gSystem->AccessPathName(fname)) {
12395  emsg.Form("input data file not found in cache (%s)", fname.Data());
12396  return -1;
12397  }
12398 
12399  // List of added objects (for proper cleaning ...)
12400  TList *added = new TList;
12401  added->SetName("PROOF_InputObjsFromFile");
12402  // Read the input data into the input list
12403  TFile *f = TFile::Open(fname.Data());
12404  if (f) {
12405  TList *keys = (TList *) f->GetListOfKeys();
12406  if (!keys) {
12407  emsg.Form("could not get list of object keys from file");
12408  return -1;
12409  }
12410  TIter nxk(keys);
12411  TKey *k = 0;
12412  while ((k = (TKey *)nxk())) {
12413  TObject *o = f->Get(k->GetName());
12414  if (o) {
12415  input->Add(o);
12416  added->Add(o);
12417  }
12418  }
12419  // Add the file as last one
12420  if (added->GetSize() > 0) {
12421  added->Add(f);
12422  input->Add(added);
12423  } else {
12424  // Cleanup the file now
12425  f->Close();
12426  delete f;
12427  }
12428  } else {
12429  emsg.Form("could not open %s", fname.Data());
12430  return -1;
12431  }
12432 
12433  // Done
12434  return 0;
12435 }
12436 
12437 ////////////////////////////////////////////////////////////////////////////////
12438 /// Start the log viewer window usign the plugin manager
12439 
12440 void TProof::LogViewer(const char *url, Int_t idx)
12441 {
12442  if (!gROOT->IsBatch()) {
12443  // Get the handler, if not yet done
12444  if (!fgLogViewer) {
12445  if ((fgLogViewer =
12446  gROOT->GetPluginManager()->FindHandler("TProofProgressLog"))) {
12447  if (fgLogViewer->LoadPlugin() == -1) {
12448  fgLogViewer = 0;
12449  ::Error("TProof::LogViewer", "cannot load the relevant plug-in");
12450  return;
12451  }
12452  }
12453  }
12454  if (fgLogViewer) {
12455  // Execute the plug-in
12456  TString u = (url && strlen(url) <= 0) ? "lite" : url;
12457  fgLogViewer->ExecPlugin(2, u.Data(), idx);
12458  }
12459  } else {
12460  if (url && strlen(url) > 0) {
12461  ::Info("TProof::LogViewer",
12462  "batch mode: use TProofLog *pl = TProof::Mgr(\"%s\")->GetSessionLogs(%d)", url, idx);
12463  } else if (url && strlen(url) <= 0) {
12464  ::Info("TProof::LogViewer",
12465  "batch mode: use TProofLog *pl = TProof::Mgr(\"lite\")->GetSessionLogs(%d)", idx);
12466  } else {
12467  ::Info("TProof::LogViewer",
12468  "batch mode: use TProofLog *pl = TProof::Mgr(\"<master>\")->GetSessionLogs(%d)", idx);
12469  }
12470  }
12471  // Done
12472  return;
12473 }
12474 
12475 ////////////////////////////////////////////////////////////////////////////////
12476 /// Enable/Disable the graphic progress dialog.
12477 /// By default the dialog is enabled
12478 
12479 void TProof::SetProgressDialog(Bool_t on)
12480 {
12481  if (on)
12482  SetBit(kUseProgressDialog);
12483  else
12484  ResetBit(kUseProgressDialog);
12485 }
12486 
12487 ////////////////////////////////////////////////////////////////////////////////
12488 /// Show information about missing files during query described by 'qr' or the
12489 /// last query if qr is null (default).
12490 /// A short summary is printed in the end.
12491 
12492 void TProof::ShowMissingFiles(TQueryResult *qr)
12493 {
12494  TQueryResult *xqr = (qr) ? qr : GetQueryResult();
12495  if (!xqr) {
12496  Warning("ShowMissingFiles", "no (last) query found: do nothing");
12497  return;
12498  }
12499 
12500  // Get the list, if any
12501  TList *missing = (xqr->GetOutputList()) ? (TList *) xqr->GetOutputList()->FindObject("MissingFiles") : 0;
12502  if (!missing) {
12503  Info("ShowMissingFiles", "no files missing in query %s:%s", xqr->GetTitle(), xqr->GetName());
12504  return;
12505  }
12506 
12507  Int_t nmf = 0, ncf = 0;
12508  Long64_t msz = 0, mszzip = 0, mev = 0;
12509  // Scan the list
12510  TFileInfo *fi = 0;
12511  TIter nxf(missing);
12512  while ((fi = (TFileInfo *) nxf())) {
12513  char status = 'M';
12514  if (fi->TestBit(TFileInfo::kCorrupted)) {
12515  ncf++;
12516  status = 'C';
12517  } else {
12518  nmf++;
12519  }
12520  TFileInfoMeta *im = fi->GetMetaData();
12521  if (im) {
12522  if (im->GetTotBytes() > 0) msz += im->GetTotBytes();
12523  if (im->GetZipBytes() > 0) mszzip += im->GetZipBytes();
12524  mev += im->GetEntries();
12525  Printf(" %d. (%c) %s %s %lld", ncf+nmf, status, fi->GetCurrentUrl()->GetUrl(), im->GetName(), im->GetEntries());
12526  } else {
12527  Printf(" %d. (%c) %s '' -1", ncf+nmf, status, fi->GetCurrentUrl()->GetUrl());
12528  }
12529  }
12530 
12531  // Final notification
12532  if (msz <= 0) msz = -1;
12533  if (mszzip <= 0) mszzip = -1;
12534  Double_t xf = (Double_t)mev / (mev + xqr->GetEntries()) ;
12535  if (msz > 0. || mszzip > 0.) {
12536  Printf(" +++ %d file(s) missing, %d corrupted, i.e. %lld unprocessed events -->"
12537  " about %.2f%% of the total (%lld bytes, %lld zipped)",
12538  nmf, ncf, mev, xf * 100., msz, mszzip);
12539  } else {
12540  Printf(" +++ %d file(s) missing, %d corrupted, i.e. %lld unprocessed events -->"
12541  " about %.2f%% of the total", nmf, ncf, mev, xf * 100.);
12542  }
12543 }
12544 
12545 ////////////////////////////////////////////////////////////////////////////////
12546 /// Get a TFileCollection with the files missing in the query described by 'qr'
12547 /// or the last query if qr is null (default).
12548 /// Return a null pointer if none were found, for whatever reason.
12549 /// The caller is responsible for the returned object.
12550 
12551 TFileCollection *TProof::GetMissingFiles(TQueryResult *qr)
12552 {
12553  TFileCollection *fc = 0;
12554 
12555  TQueryResult *xqr = (qr) ? qr : GetQueryResult();
12556  if (!xqr) {
12557  Warning("GetMissingFiles", "no (last) query found: do nothing");
12558  return fc;
12559  }
12560 
12561  // Get the list, if any
12562  TList *missing = (xqr->GetOutputList()) ? (TList *) xqr->GetOutputList()->FindObject("MissingFiles") : 0;
12563  if (!missing) {
12564  if (gDebug > 0)
12565  Info("ShowMissingFiles", "no files missing in query %s:%s", xqr->GetTitle(), xqr->GetName());
12566  return fc;
12567  }
12568 
12569  // Create collection: name is <dsname>.m<j>, where 'j' is the first giving a non existing name
12570  TString fcname("unknown");
12571  TDSet *ds = (TDSet *) xqr->GetInputObject("TDSet");
12572  if (ds) {
12573  fcname.Form("%s.m0", ds->GetName());
12574  Int_t j = 1;
12575  while (gDirectory->FindObject(fcname) && j < 1000)
12576  fcname.Form("%s.m%d", ds->GetName(), j++);
12577  }
12578  fc = new TFileCollection(fcname, "Missing Files");
12579  if (ds) fc->SetDefaultTreeName(ds->GetObjName());
12580  // Scan the list
12581  TFileInfo *fi = 0;
12582  TIter nxf(missing);
12583  while ((fi = (TFileInfo *) nxf())) {
12584  fc->Add((TFileInfo *) fi->Clone());
12585  }
12586  fc->Update();
12587  // Done
12588  return fc;
12589 }
12590 
12591 ////////////////////////////////////////////////////////////////////////////////
12592 /// Enable/Disable saving of the performance tree
12593 
12594 void TProof::SetPerfTree(const char *pf, Bool_t withWrks)
12595 {
12596  if (pf && strlen(pf) > 0) {
12597  fPerfTree = pf;
12598  SetParameter("PROOF_StatsHist", "");
12599  SetParameter("PROOF_StatsTrace", "");
12600  if (withWrks) SetParameter("PROOF_SlaveStatsTrace", "");
12601  Info("SetPerfTree", "saving of the performance tree enabled (%s)", fPerfTree.Data());
12602  } else {
12603  fPerfTree = "";
12604  DeleteParameters("PROOF_StatsHist");
12605  DeleteParameters("PROOF_StatsTrace");
12606  DeleteParameters("PROOF_SlaveStatsTrace");
12607  Info("SetPerfTree", "saving of the performance tree disabled");
12608  }
12609 }
12610 
12611 ////////////////////////////////////////////////////////////////////////////////
12612 /// Save performance information from TPerfStats to file 'pf'.
12613 /// If 'ref' is defined, do it for query 'ref'.
12614 /// Return 0 on sucecss, -1 in case of any error
12615 
12616 Int_t TProof::SavePerfTree(const char *pf, const char *ref)
12617 {
12618  if (!IsValid()) {
12619  Error("SafePerfTree", "this TProof instance is invalid!");
12620  return -1;
12621  }
12622 
12623  TList *outls = GetOutputList();
12624  TString sref;
12625  if (ref && strlen(ref) > 0) {
12626  if (!fPlayer) {
12627  Error("SafePerfTree", "requested to use query '%s' but player instance undefined!", ref);
12628  return -1;
12629  }
12630  TQueryResult *qr = fPlayer->GetQueryResult(ref);
12631  if (!qr) {
12632  Error("SafePerfTree", "TQueryResult instance for query '%s' could not be retrieved", ref);
12633  return -1;
12634  }
12635  outls = qr->GetOutputList();
12636  sref.Form(" for requested query '%s'", ref);
12637  }
12638  if (!outls || (outls && outls->GetSize() <= 0)) {
12639  Error("SafePerfTree", "outputlist%s undefined or empty", sref.Data());
12640  return -1;
12641  }
12642 
12643  TString fn = fPerfTree;
12644  if (pf && strlen(pf)) fn = pf;
12645  if (fn.IsNull()) fn = "perftree.root";
12646 
12647  TFile f(fn, "RECREATE");
12648  if (f.IsZombie()) {
12649  Error("SavePerfTree", "could not open file '%s' for writing", fn.Data());
12650  } else {
12651  f.cd();
12652  TIter nxo(outls);
12653  TObject* obj = 0;
12654  while ((obj = nxo())) {
12655  TString objname(obj->GetName());
12656  if (objname.BeginsWith("PROOF_")) {
12657  // Must list the objects since other PROOF_ objects exist
12658  // besides timing objects
12659  if (objname == "PROOF_PerfStats" ||
12660  objname == "PROOF_PacketsHist" ||
12661  objname == "PROOF_EventsHist" ||
12662  objname == "PROOF_NodeHist" ||
12663  objname == "PROOF_LatencyHist" ||
12664  objname == "PROOF_ProcTimeHist" ||
12665  objname == "PROOF_CpuTimeHist")
12666  obj->Write();
12667  }
12668  }
12669  f.Close();
12670  }
12671  Info("SavePerfTree", "performance information%s saved in %s ...", sref.Data(), fn.Data());
12672 
12673  // Done
12674  return 0;
12675 }