Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TUrl.cxx
Go to the documentation of this file.
1 // @(#)root/base:$Id$
2 // Author: Fons Rademakers 17/01/97
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 /** \class TUrl
13 \ingroup Base
14 
15 This class represents a WWW compatible URL.
16 It provides member functions to return the different parts of
17 an URL. The supported url format is:
18 ~~~ {.cpp}
19  [proto://][user[:passwd]@]host[:port]/file.ext[#anchor][?options]
20 ~~~
21 */
22 
23 #include <stdlib.h>
24 #include "TUrl.h"
25 #include "THashList.h"
26 #include "TObjArray.h"
27 #include "TObjString.h"
28 #include "TEnv.h"
29 #include "TSystem.h"
30 #include "TMap.h"
31 #include "TROOT.h"
32 
33 TObjArray *TUrl::fgSpecialProtocols = nullptr;
34 THashList *TUrl::fgHostFQDNs = nullptr;
35 
36 #ifdef R__COMPLETE_MEM_TERMINATION
37 namespace {
38  class TUrlCleanup {
39  TObjArray **fSpecialProtocols;
40  THashList **fHostFQDNs;
41  public:
42  TUrlCleanup(TObjArray **protocols, THashList **hosts) : fSpecialProtocols(protocols),fHostFQDNs(hosts) {}
43  ~TUrlCleanup() {
44  if (*fSpecialProtocols) (*fSpecialProtocols)->Delete();
45  delete *fSpecialProtocols;
46  *fSpecialProtocols = 0;
47  if (*fHostFQDNs) (*fHostFQDNs)->Delete();
48  delete *fHostFQDNs;
49  *fHostFQDNs = 0;
50  }
51  };
52 }
53 #endif
54 
55 ClassImp(TUrl);
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// Parse url character string and split in its different subcomponents.
59 /// Use IsValid() to check if URL is legal.
60 /// ~~~ {.cpp}
61 /// url: [proto://][user[:passwd]@]host[:port]/file.ext[?options][#anchor]
62 /// ~~~
63 /// Known protocols: http, root, proof, ftp, news and any special protocols
64 /// defined in the rootrc Url.Special key.
65 /// The default protocol is "http", unless defaultIsFile is true in which
66 /// case the url is assumed to be of type "file".
67 /// If a passwd contains a @ it must be escaped by a \\, e.g.
68 /// "pip@" becomes "pip\\@".
69 ///
70 /// Default ports: http=80, root=1094, proof=1093, ftp=20, news=119.
71 /// Port #1093 has been assigned by IANA (www.iana.org) to proofd.
72 /// Port #1094 has been assigned by IANA (www.iana.org) to rootd.
73 
74 TUrl::TUrl(const char *url, Bool_t defaultIsFile)
75 {
76  SetUrl(url, defaultIsFile);
77 
78 #ifdef R__COMPLETE_MEM_TERMINATION
79  static TUrlCleanup cleanup(&fgSpecialProtocols,&fgHostFQDNs);
80 #endif
81 }
82 
83 ////////////////////////////////////////////////////////////////////////////////
84 /// Cleanup.
85 
86 TUrl::~TUrl()
87 {
88  delete fOptionsMap;
89 }
90 
91 ////////////////////////////////////////////////////////////////////////////////
92 /// Parse url character string and split in its different subcomponents.
93 /// Use IsValid() to check if URL is legal.
94 ///~~~ {.cpp}
95 /// url: [proto://][user[:passwd]@]host[:port]/file.ext[?options][#anchor]
96 ///~~~
97 /// Known protocols: http, root, proof, ftp, news and any special protocols
98 /// defined in the rootrc Url.Special key.
99 /// The default protocol is "http", unless defaultIsFile is true in which
100 /// case the url is assumed to be of type "file".
101 /// If a passwd contains a @ it must be escaped by a \\, e.g.
102 /// "pip@" becomes "pip\\@".
103 ///
104 /// Default ports: http=80, root=1094, proof=1093, ftp=20, news=119.
105 /// Port #1093 has been assigned by IANA (www.iana.org) to proofd.
106 /// Port #1094 has been assigned by IANA (www.iana.org) to rootd.
107 
108 void TUrl::SetUrl(const char *url, Bool_t defaultIsFile)
109 {
110  fOptionsMap = nullptr;
111 
112  if (!url || !url[0]) {
113  fPort = -1;
114  return;
115  }
116 
117  // Set defaults
118  fUrl = "";
119  fProtocol = "http";
120  fUser = "";
121  fPasswd = "";
122  fHost = "";
123  fPort = 80;
124  fFile = "";
125  fAnchor = "";
126  fOptions = "";
127  fFileOA = "";
128  fHostFQ = "";
129 
130  // if url starts with a / consider it as a file url
131  if (url[0] == '/')
132  defaultIsFile = kTRUE;
133 
134  // Find protocol
135  char *s, sav;
136 
137  char *u, *u0 = Strip(url);
138 tryfile:
139  u = u0;
140 
141  // Handle special protocol cases: "file:", etc.
142  for (int i = 0; i < GetSpecialProtocols()->GetEntriesFast(); i++) {
143  TObjString *os = (TObjString*) GetSpecialProtocols()->UncheckedAt(i);
144  TString s1 = os->GetString();
145  int l = s1.Length();
146  Bool_t stripoff = kFALSE;
147  if (s1.EndsWith("/-")) {
148  stripoff = kTRUE;
149  s1 = s1.Strip(TString::kTrailing, '-');
150  l--;
151  }
152  if (!strncmp(u, s1, l)) {
153  if (s1(0) == '/' && s1(l-1) == '/') {
154  // case with file namespace like: /alien/user/file.root
155  fProtocol = s1(1, l-2);
156  if (stripoff)
157  l--; // strip off namespace prefix from file name
158  else
159  l = 0; // leave namespace prefix as part of file name
160  } else {
161  // case with protocol, like: file:/data/file.root
162  fProtocol = s1(0, l-1);
163  }
164  if (!strncmp(u+l, "//", 2))
165  u += l+2;
166  else
167  u += l;
168  fPort = 0;
169 
170  FindFile(u, kFALSE);
171 
172  delete [] u0;
173  return;
174  }
175  }
176 
177  u = u0;
178 
179  char *x, *t, *s2;
180  // allow x:/path as Windows filename
181  if ((s = strstr(u, ":/")) && u+1 != s) {
182  if (*(s+2) != '/') {
183  Error("TUrl", "%s malformed, URL must contain \"://\"", u0);
184  fPort = -1;
185  goto cleanup;
186  }
187  sav = *s;
188  *s = 0;
189  SetProtocol(u, kTRUE);
190  *s = sav;
191  s += 3;
192  // allow url of form: "proto://"
193  } else {
194  if (defaultIsFile) {
195  char *newu = new char [strlen("file:") + strlen(u0) + 1];
196  sprintf(newu, "file:%s", u0);
197  delete [] u0;
198  u0 = newu;
199  goto tryfile;
200  }
201  s = u;
202  }
203 
204  // Find user and passwd
205  u = s;
206  t = s;
207 again:
208  if ((s = strchr(t, '@')) && (
209  ((x = strchr(t, '/')) && s < x) ||
210  ((x = strchr(t, '?')) && s < x) ||
211  ((x = strchr(t, '#')) && s < x) ||
212  (!strchr(t, '/'))
213  )) {
214  if (*(s-1) == '\\') {
215  t = s+1;
216  goto again;
217  }
218  sav = *s;
219  *s = 0;
220  if ((s2 = strchr(u, ':'))) {
221  *s2 = 0;
222  fUser = u;
223  *s2 = ':';
224  s2++;
225  if (*s2) {
226  fPasswd = s2;
227  fPasswd.ReplaceAll("\\@", "@");
228  }
229  } else
230  fUser = u;
231  *s = sav;
232  s++;
233  } else
234  s = u;
235 
236  // Find host
237  u = s;
238  if ((s = strchr(u, ':')) || (s = strchr(u, '/')) || (s = strchr(u, '?')) || (s = strchr(u, '#'))) {
239  if ((strchr (u, ':') > strchr(u, '/')) && (strchr (u, '/')))
240  s = strchr(u, '/');
241  sav = *s;
242  *s = 0;
243  fHost = u;
244  *s = sav;
245  if (sav == ':') {
246  s++;
247  // Get port #
248  if (!*s) {
249  fPort = -1;
250  goto cleanup;
251  }
252  u = s;
253  if ((s = strchr(u, '/')) || (s = strchr(u, '?')) || (s = strchr(u, '#'))) {
254  sav = *s;
255  *s = 0;
256  fPort = atoi(u);
257  *s = sav;
258  } else {
259  fPort = atoi(u);
260  goto cleanup;
261  }
262  }
263  } else {
264  fHost = u;
265  goto cleanup;
266  }
267 
268  if (!*s) goto cleanup;
269 
270  // Find file
271  u = s;
272  if (*u == '/' && fHost.Length())
273  u++;
274 
275  FindFile(u);
276 
277 cleanup:
278  delete [] u0;
279 }
280 
281 ////////////////////////////////////////////////////////////////////////////////
282 /// Find file and optionally anchor and options.
283 
284 void TUrl::FindFile(char *u, Bool_t stripDoubleSlash)
285 {
286  char *s, sav;
287 
288  // Locate anchor and options, if any
289  char *opt = strchr(u, '?');
290  char *anc = strchr(u, '#');
291 
292  // URL invalid if anchor is coming before the options
293  if (opt && anc && opt > anc) {
294  fPort = -1;
295  return;
296  }
297 
298  if ((s = opt) || (s = anc)) {
299  sav = *s;
300  *s = 0;
301  fFile = u;
302  if (stripDoubleSlash)
303  fFile.ReplaceAll("//", "/");
304  *s = sav;
305  s++;
306  if (sav == '?') {
307  // Get options
308  if (!*s) {
309  // options string is empty
310  return;
311  }
312  u = s;
313  if ((s = strchr(u, '#'))) {
314  sav = *s;
315  *s = 0;
316  fOptions = u;
317  *s = sav;
318  s++;
319  } else {
320  fOptions = u;
321  return;
322  }
323  }
324  if (!*s) {
325  // anchor string is empty
326  return;
327  }
328  } else {
329  fFile = u;
330  if (stripDoubleSlash)
331  fFile.ReplaceAll("//", "/");
332  return;
333  }
334 
335  // Set anchor
336  fAnchor = s;
337 }
338 
339 ////////////////////////////////////////////////////////////////////////////////
340 /// TUrl copy ctor.
341 
342 TUrl::TUrl(const TUrl &url) : TObject(url)
343 {
344  fUrl = url.fUrl;
345  fProtocol = url.fProtocol;
346  fUser = url.fUser;
347  fPasswd = url.fPasswd;
348  fHost = url.fHost;
349  fFile = url.fFile;
350  fAnchor = url.fAnchor;
351  fOptions = url.fOptions;
352  fPort = url.fPort;
353  fFileOA = url.fFileOA;
354  fHostFQ = url.fHostFQ;
355  fOptionsMap = nullptr;
356 }
357 
358 ////////////////////////////////////////////////////////////////////////////////
359 /// TUrl assignment operator.
360 
361 TUrl &TUrl::operator=(const TUrl &rhs)
362 {
363  if (this != &rhs) {
364  TObject::operator=(rhs);
365  fUrl = rhs.fUrl;
366  fProtocol = rhs.fProtocol;
367  fUser = rhs.fUser;
368  fPasswd = rhs.fPasswd;
369  fHost = rhs.fHost;
370  fFile = rhs.fFile;
371  fAnchor = rhs.fAnchor;
372  fOptions = rhs.fOptions;
373  fPort = rhs.fPort;
374  fFileOA = rhs.fFileOA;
375  fHostFQ = rhs.fHostFQ;
376  fOptionsMap = nullptr;
377  }
378  return *this;
379 }
380 
381 ////////////////////////////////////////////////////////////////////////////////
382 /// Return full URL. If withDflt is kTRUE, explicitly add the port even
383 /// if it matches the default value for the URL protocol.
384 
385 const char *TUrl::GetUrl(Bool_t withDeflt) const
386 {
387  if (((TestBit(kUrlWithDefaultPort) && !withDeflt) ||
388  (!TestBit(kUrlWithDefaultPort) && withDeflt)) &&
389  TestBit(kUrlHasDefaultPort))
390  fUrl = "";
391 
392  if (IsValid() && fUrl == "") {
393  // Handle special protocol cases: file:, etc.
394  for (int i = 0; i < GetSpecialProtocols()->GetEntriesFast(); i++) {
395  TObjString *os = (TObjString*) GetSpecialProtocols()->UncheckedAt(i);
396  TString &s = os->String();
397  int l = s.Length();
398  if (fProtocol == s(0, l-1)) {
399  if (fFile[0] == '/')
400  fUrl = fProtocol + "://" + fFile;
401  else
402  fUrl = fProtocol + ":" + fFile;
403  if (fOptions != "") {
404  fUrl += "?";
405  fUrl += fOptions;
406  }
407  if (fAnchor != "") {
408  fUrl += "#";
409  fUrl += fAnchor;
410  }
411  return fUrl;
412  }
413  }
414 
415  Bool_t deflt = kFALSE;
416  if ((!fProtocol.CompareTo("http") && fPort == 80) ||
417  (fProtocol.BeginsWith("proof") && fPort == 1093) ||
418  (fProtocol.BeginsWith("root") && fPort == 1094) ||
419  (!fProtocol.CompareTo("ftp") && fPort == 20) ||
420  (!fProtocol.CompareTo("news") && fPort == 119) ||
421  (!fProtocol.CompareTo("https") && fPort == 443) ||
422  fPort == 0) {
423  deflt = kTRUE;
424  ((TUrl *)this)->SetBit(kUrlHasDefaultPort);
425  }
426 
427  fUrl = fProtocol + "://";
428  if (fUser != "") {
429  fUrl += fUser;
430  if (fPasswd != "") {
431  fUrl += ":";
432  TString passwd = fPasswd;
433  passwd.ReplaceAll("@", "\\@");
434  fUrl += passwd;
435  }
436  fUrl += "@";
437  }
438  if (withDeflt)
439  ((TUrl*)this)->SetBit(kUrlWithDefaultPort);
440  else
441  ((TUrl*)this)->ResetBit(kUrlWithDefaultPort);
442 
443  if (!deflt || withDeflt) {
444  char p[10];
445  sprintf(p, "%d", fPort);
446  fUrl = fUrl + fHost + ":" + p + "/" + fFile;
447  } else
448  fUrl = fUrl + fHost + "/" + fFile;
449  if (fOptions != "") {
450  fUrl += "?";
451  fUrl += fOptions;
452  }
453  if (fAnchor != "") {
454  fUrl += "#";
455  fUrl += fAnchor;
456  }
457  }
458 
459  fUrl.ReplaceAll("////", "///");
460  return fUrl;
461 }
462 
463 ////////////////////////////////////////////////////////////////////////////////
464 /// Return fully qualified domain name of url host. If host cannot be
465 /// resolved or not valid return the host name as originally specified.
466 
467 const char *TUrl::GetHostFQDN() const
468 {
469  if (fHostFQ == "") {
470  // Check if we already resolved it
471  TNamed *fqdn = fgHostFQDNs ? (TNamed *) fgHostFQDNs->FindObject(fHost) : 0;
472  if (!fqdn) {
473  TInetAddress adr(gSystem->GetHostByName(fHost));
474  if (adr.IsValid()) {
475  fHostFQ = adr.GetHostName();
476  } else
477  fHostFQ = "-";
478  R__LOCKGUARD(gROOTMutex);
479  if (!fgHostFQDNs) {
480  fgHostFQDNs = new THashList;
481  fgHostFQDNs->SetOwner();
482  }
483  if (fgHostFQDNs && !fgHostFQDNs->FindObject(fHost))
484  fgHostFQDNs->Add(new TNamed(fHost,fHostFQ));
485  } else {
486  fHostFQ = fqdn->GetTitle();
487  }
488  }
489  if (fHostFQ == "-")
490  return fHost;
491  return fHostFQ;
492 }
493 
494 ////////////////////////////////////////////////////////////////////////////////
495 /// Return the file and its options (the string specified behind the ?).
496 /// Convenience function useful when the option is used to pass
497 /// authentication/access information for the specified file.
498 
499 const char *TUrl::GetFileAndOptions() const
500 {
501  if (fFileOA == "") {
502  fFileOA = fFile;
503  if (fOptions != "") {
504  fFileOA += "?";
505  fFileOA += fOptions;
506  }
507  if (fAnchor != "") {
508  fFileOA += "#";
509  fFileOA += fAnchor;
510  }
511  }
512  return fFileOA;
513 }
514 
515 ////////////////////////////////////////////////////////////////////////////////
516 /// Set protocol and, optionally, change the port accordingly.
517 
518 void TUrl::SetProtocol(const char *proto, Bool_t setDefaultPort)
519 {
520  fProtocol = proto;
521  if (setDefaultPort) {
522  if (!fProtocol.CompareTo("http"))
523  fPort = 80;
524  else if (!fProtocol.CompareTo("https"))
525  fPort = 443;
526  else if (fProtocol.BeginsWith("proof")) // can also be proofs or proofk
527  fPort = 1093;
528  else if (fProtocol.BeginsWith("root")) // can also be roots or rootk
529  fPort = 1094;
530  else if (!fProtocol.CompareTo("ftp"))
531  fPort = 20;
532  else if (!fProtocol.CompareTo("news"))
533  fPort = 119;
534  else {
535  // generic protocol (no default port)
536  fPort = 0;
537  }
538  }
539  fUrl = "";
540 }
541 
542 ////////////////////////////////////////////////////////////////////////////////
543 /// Compare two urls as strings.
544 
545 Int_t TUrl::Compare(const TObject *obj) const
546 {
547  if (this == obj) return 0;
548  if (TUrl::Class() != obj->IsA()) return -1;
549  return TString(GetUrl()).CompareTo(((TUrl*)obj)->GetUrl(), TString::kExact);
550 }
551 
552 ////////////////////////////////////////////////////////////////////////////////
553 /// Print URL on stdout.
554 
555 void TUrl::Print(Option_t *) const
556 {
557  if (fPort == -1)
558  Printf("Illegal URL");
559 
560  Printf("%s", GetUrl());
561 }
562 
563 ////////////////////////////////////////////////////////////////////////////////
564 /// Read the list of special protocols from the rootrc files.
565 /// These protocols will be parsed in a protocol and a file part,
566 /// no host or other info will be determined. This is typically
567 /// used for legacy file descriptions like: file:/path/file.root.
568 
569 TObjArray *TUrl::GetSpecialProtocols()
570 {
571  R__LOCKGUARD(gROOTMutex);
572  static Bool_t usedEnv = kFALSE;
573 
574  if (!gEnv) {
575  if (!fgSpecialProtocols)
576  fgSpecialProtocols = new TObjArray;
577  if (fgSpecialProtocols->GetEntriesFast() == 0)
578  fgSpecialProtocols->Add(new TObjString("file:"));
579  return fgSpecialProtocols;
580  }
581 
582  if (usedEnv)
583  return fgSpecialProtocols;
584 
585  if (fgSpecialProtocols)
586  fgSpecialProtocols->Delete();
587 
588  if (!fgSpecialProtocols)
589  fgSpecialProtocols = new TObjArray;
590 
591  const char *protos = gEnv->GetValue("Url.Special", "file: hpss: dcache: dcap:");
592  usedEnv = kTRUE;
593 
594  if (protos) {
595  Int_t cnt = 0;
596  char *p = StrDup(protos);
597  while (1) {
598  TObjString *proto = new TObjString(strtok(!cnt ? p : 0, " "));
599  if (proto->String().IsNull()) {
600  delete proto;
601  break;
602  }
603  fgSpecialProtocols->Add(proto);
604  cnt++;
605  }
606  delete [] p;
607  }
608  return fgSpecialProtocols;
609 }
610 
611 
612 ////////////////////////////////////////////////////////////////////////////////
613 /// Parse URL options into a key/value map.
614 
615 void TUrl::ParseOptions() const
616 {
617  if (fOptionsMap) return;
618 
619  TString urloptions = GetOptions();
620  TObjArray *objOptions = urloptions.Tokenize("&");
621  for (Int_t n = 0; n < objOptions->GetEntries(); n++) {
622  TString loption = ((TObjString *) objOptions->At(n))->GetName();
623  TObjArray *objTags = loption.Tokenize("=");
624  if (!fOptionsMap) {
625  fOptionsMap = new TMap;
626  fOptionsMap->SetOwnerKeyValue();
627  }
628  if (objTags->GetEntries() == 2) {
629  TString key = ((TObjString *) objTags->At(0))->GetName();
630  TString value = ((TObjString *) objTags->At(1))->GetName();
631  fOptionsMap->Add(new TObjString(key), new TObjString(value));
632  } else {
633  TString key = ((TObjString *) objTags->At(0))->GetName();
634  fOptionsMap->Add(new TObjString(key), 0);
635  }
636  delete objTags;
637  }
638  delete objOptions;
639 }
640 
641 
642 ////////////////////////////////////////////////////////////////////////////////
643 /// Return a value for a given key from the URL options.
644 /// Returns 0 in case key is not found.
645 
646 const char *TUrl::GetValueFromOptions(const char *key) const
647 {
648  if (!key) return nullptr;
649  ParseOptions();
650  TObject *option = fOptionsMap ? fOptionsMap->GetValue(key) : nullptr;
651  return (option ? ((TObjString*)fOptionsMap->GetValue(key))->GetName(): nullptr);
652 }
653 
654 ////////////////////////////////////////////////////////////////////////////////
655 /// Return a value for a given key from the URL options as an Int_t,
656 /// a missing key returns -1.
657 
658 Int_t TUrl::GetIntValueFromOptions(const char *key) const
659 {
660  if (!key) return -1;
661  ParseOptions();
662  TObject *option = fOptionsMap ? fOptionsMap->GetValue(key) : nullptr;
663  return (option ? (atoi(((TObjString*)fOptionsMap->GetValue(key))->GetName())) : -1);
664 }
665 
666 ////////////////////////////////////////////////////////////////////////////////
667 /// Returns true if the given key appears in the URL options list.
668 
669 Bool_t TUrl::HasOption(const char *key) const
670 {
671  if (!key) return kFALSE;
672  ParseOptions();
673 
674  if (fOptionsMap && fOptionsMap->FindObject(key))
675  return kTRUE;
676  return kFALSE;
677 }
678 
679 ////////////////////////////////////////////////////////////////////////////////
680 /// Recompute the path removing all relative directory jumps via '..'.
681 
682 void TUrl::CleanRelativePath()
683 {
684  Ssiz_t slash = 0;
685  while ( (slash = fFile.Index("/..") ) != kNPOS) {
686  // find backwards the next '/'
687  Bool_t found = kFALSE;
688  for (int l = slash-1; l >=0; l--) {
689  if (fFile[l] == '/') {
690  // found previous '/'
691  fFile.Remove(l, slash+3-l);
692  found = kTRUE;
693  break;
694  }
695  }
696  if (!found)
697  break;
698  }
699 }