Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
TWebFile.cxx
Go to the documentation of this file.
1 // @(#)root/net:$Id$
2 // Author: Fons Rademakers 17/01/97
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2000, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////////////////////
13 // //
14 // TWebFile //
15 // //
16 // A TWebFile is like a normal TFile except that it reads its data //
17 // via a standard apache web server. A TWebFile is a read-only file. //
18 // //
19 //////////////////////////////////////////////////////////////////////////
20 
21 #include "TWebFile.h"
22 #include "TROOT.h"
23 #include "TSocket.h"
24 #include "Bytes.h"
25 #include "TError.h"
26 #include "TSystem.h"
27 #include "TBase64.h"
28 #include "TVirtualPerfStats.h"
29 #ifdef R__SSL
30 #include "TSSLSocket.h"
31 #endif
32 
33 #include <errno.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef WIN32
38 # ifndef EADDRINUSE
39 # define EADDRINUSE 10048
40 # endif
41 # ifndef EISCONN
42 # define EISCONN 10056
43 # endif
44 #endif
45 
46 static const char *gUserAgent = "User-Agent: ROOT-TWebFile/1.1";
47 
48 TUrl TWebFile::fgProxy;
49 
50 Long64_t TWebFile::fgMaxFullCacheSize = 500000000;
51 
52 
53 // Internal class used to manage the socket that may stay open between
54 // calls when HTTP/1.1 protocol is used
55 class TWebSocket {
56 private:
57  TWebFile *fWebFile; // associated web file
58 public:
59  TWebSocket(TWebFile *f);
60  ~TWebSocket();
61  void ReOpen();
62 };
63 
64 ////////////////////////////////////////////////////////////////////////////////
65 /// Open web file socket.
66 
67 TWebSocket::TWebSocket(TWebFile *f)
68 {
69  fWebFile = f;
70  if (!f->fSocket)
71  ReOpen();
72 }
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 /// Close socket in case not HTTP/1.1 protocol or when explicitly requested.
76 
77 TWebSocket::~TWebSocket()
78 {
79  if (!fWebFile->fHTTP11) {
80  delete fWebFile->fSocket;
81  fWebFile->fSocket = 0;
82  }
83 }
84 
85 ////////////////////////////////////////////////////////////////////////////////
86 /// Re-open web file socket.
87 
88 void TWebSocket::ReOpen()
89 {
90  if (fWebFile->fSocket) {
91  delete fWebFile->fSocket;
92  fWebFile->fSocket = 0;
93  }
94 
95  TUrl connurl;
96  if (fWebFile->fProxy.IsValid())
97  connurl = fWebFile->fProxy;
98  else
99  connurl = fWebFile->fUrl;
100 
101  for (Int_t i = 0; i < 5; i++) {
102  if (strcmp(connurl.GetProtocol(), "https") == 0) {
103 #ifdef R__SSL
104  fWebFile->fSocket = new TSSLSocket(connurl.GetHost(), connurl.GetPort());
105 #else
106  ::Error("TWebSocket::ReOpen", "library compiled without SSL, https not supported");
107  return;
108 #endif
109  } else
110  fWebFile->fSocket = new TSocket(connurl.GetHost(), connurl.GetPort());
111 
112  if (!fWebFile->fSocket || !fWebFile->fSocket->IsValid()) {
113  delete fWebFile->fSocket;
114  fWebFile->fSocket = 0;
115  if (gSystem->GetErrno() == EADDRINUSE || gSystem->GetErrno() == EISCONN) {
116  gSystem->Sleep(i*10);
117  } else {
118  ::Error("TWebSocket::ReOpen", "cannot connect to host %s (errno=%d)",
119  fWebFile->fUrl.GetHost(), gSystem->GetErrno());
120  return;
121  }
122  } else
123  return;
124  }
125 }
126 
127 
128 ClassImp(TWebFile);
129 
130 ////////////////////////////////////////////////////////////////////////////////
131 /// Create a Web file object. A web file is the same as a read-only
132 /// TFile except that it is being read via a HTTP server. The url
133 /// argument must be of the form: http://host.dom.ain/file.root.
134 /// The opt can be "NOPROXY", to bypass any set "http_proxy" shell
135 /// variable. The proxy can be specified as (in sh, or equivalent csh):
136 /// export http_proxy=http://pcsalo.cern.ch:3128
137 /// The proxy can also be specified via the static method TWebFile::SetProxy().
138 /// Basic authentication (AuthType Basic) is supported. The user name and
139 /// passwd can be specified in the url like this:
140 /// http://username:mypasswd@pcsalo.cern.ch/files/aap.root
141 /// If the file specified in the URL does not exist or is not accessible
142 /// the kZombie bit will be set in the TWebFile object. Use IsZombie()
143 /// to see if the file is accessible. The preferred interface to this
144 /// constructor is via TFile::Open().
145 
146 TWebFile::TWebFile(const char *url, Option_t *opt) : TFile(url, "WEB"), fSocket(0)
147 {
148  TString option = opt;
149  fNoProxy = kFALSE;
150  if (option.Contains("NOPROXY", TString::kIgnoreCase))
151  fNoProxy = kTRUE;
152  CheckProxy();
153 
154  Bool_t headOnly = kFALSE;
155  if (option.Contains("HEADONLY", TString::kIgnoreCase))
156  headOnly = kTRUE;
157 
158  if (option == "IO")
159  return;
160 
161  Init(headOnly);
162 }
163 
164 ////////////////////////////////////////////////////////////////////////////////
165 /// Create a Web file object. A web file is the same as a read-only
166 /// TFile except that it is being read via a HTTP server. Make sure url
167 /// is a valid TUrl object.
168 /// The opt can be "NOPROXY", to bypass any set "http_proxy" shell
169 /// variable. The proxy can be specified as (in sh, or equivalent csh):
170 /// export http_proxy=http://pcsalo.cern.ch:3128
171 /// The proxy can also be specified via the static method TWebFile::SetProxy().
172 /// Basic authentication (AuthType Basic) is supported. The user name and
173 /// passwd can be specified in the url like this:
174 /// http://username:mypasswd@pcsalo.cern.ch/files/aap.root
175 /// If the file specified in the URL does not exist or is not accessible
176 /// the kZombie bit will be set in the TWebFile object. Use IsZombie()
177 /// to see if the file is accessible.
178 
179 TWebFile::TWebFile(TUrl url, Option_t *opt) : TFile(url.GetUrl(), "WEB"), fSocket(0)
180 {
181  TString option = opt;
182  fNoProxy = kFALSE;
183  if (option.Contains("NOPROXY", TString::kIgnoreCase))
184  fNoProxy = kTRUE;
185  CheckProxy();
186 
187  Bool_t headOnly = kFALSE;
188  if (option.Contains("HEADONLY", TString::kIgnoreCase))
189  headOnly = kTRUE;
190 
191  Init(headOnly);
192 }
193 
194 ////////////////////////////////////////////////////////////////////////////////
195 /// Cleanup.
196 
197 TWebFile::~TWebFile()
198 {
199  delete fSocket;
200  if (fFullCache) {
201  free(fFullCache);
202  fFullCache = 0;
203  fFullCacheSize = 0;
204  }
205 }
206 
207 ////////////////////////////////////////////////////////////////////////////////
208 /// Initialize a TWebFile object.
209 
210 void TWebFile::Init(Bool_t readHeadOnly)
211 {
212  char buf[4];
213  int err;
214 
215  fSocket = 0;
216  fSize = -1;
217  fHasModRoot = kFALSE;
218  fHTTP11 = kFALSE;
219  fFullCache = 0;
220  fFullCacheSize = 0;
221  SetMsgReadBuffer10();
222 
223  if ((err = GetHead()) < 0) {
224  if (readHeadOnly) {
225  fD = -1;
226  fWritten = err;
227  return;
228  }
229  if (err == -2) {
230  Error("TWebFile", "%s does not exist", fBasicUrl.Data());
231  MakeZombie();
232  gDirectory = gROOT;
233  return;
234  }
235  // err == -3 HEAD not supported, fall through and try ReadBuffer()
236  }
237  if (readHeadOnly) {
238  fD = -1;
239  return;
240  }
241 
242  if (fIsRootFile) {
243  Seek(0);
244  if (ReadBuffer(buf, 4)) {
245  MakeZombie();
246  gDirectory = gROOT;
247  return;
248  }
249 
250  if (strncmp(buf, "root", 4) && strncmp(buf, "PK", 2)) { // PK is zip file
251  Error("TWebFile", "%s is not a ROOT file", fBasicUrl.Data());
252  MakeZombie();
253  gDirectory = gROOT;
254  return;
255  }
256  }
257 
258  TFile::Init(kFALSE);
259  fD = -2; // so TFile::IsOpen() will return true when in TFile::~TFile
260 }
261 
262 ////////////////////////////////////////////////////////////////////////////////
263 /// Set GET command for use by ReadBuffer(s)10(), handle redirection if
264 /// needed. Give full URL so Apache's virtual hosts solution works.
265 
266 void TWebFile::SetMsgReadBuffer10(const char *redirectLocation, Bool_t tempRedirect)
267 {
268  TUrl oldUrl;
269  TString oldBasicUrl;
270 
271  if (redirectLocation) {
272  if (tempRedirect) { // temp redirect
273  fUrlOrg = fUrl;
274  fBasicUrlOrg = fBasicUrl;
275  } else { // permanent redirect
276  fUrlOrg = "";
277  fBasicUrlOrg = "";
278  }
279 
280  oldUrl = fUrl;
281  oldBasicUrl = fBasicUrl;
282 
283  fUrl.SetUrl(redirectLocation);
284  fBasicUrl = fUrl.GetProtocol();
285  fBasicUrl += "://";
286  fBasicUrl += fUrl.GetHost();
287  fBasicUrl += ":";
288  fBasicUrl += fUrl.GetPort();
289  fBasicUrl += "/";
290  fBasicUrl += fUrl.GetFile();
291  // add query string again
292  TString rdl(redirectLocation);
293  if (rdl.Index("?") >= 0) {
294  rdl = rdl(rdl.Index("?"), rdl.Length());
295  fBasicUrl += rdl;
296  }
297  }
298 
299  if (fMsgReadBuffer10 != "") {
300  // patch up existing command
301  if (oldBasicUrl != "") {
302  // change to redirection location
303  fMsgReadBuffer10.ReplaceAll(oldBasicUrl, fBasicUrl);
304  fMsgReadBuffer10.ReplaceAll(TString("Host: ")+oldUrl.GetHost(), TString("Host: ")+fUrl.GetHost());
305  } else if (fBasicUrlOrg != "") {
306  // change back from temp redirection location
307  fMsgReadBuffer10.ReplaceAll(fBasicUrl, fBasicUrlOrg);
308  fMsgReadBuffer10.ReplaceAll(TString("Host: ")+fUrl.GetHost(), TString("Host: ")+fUrlOrg.GetHost());
309  fUrl = fUrlOrg;
310  fBasicUrl = fBasicUrlOrg;
311  fUrlOrg = "";
312  fBasicUrlOrg = "";
313  }
314  }
315 
316  if (fBasicUrl == "") {
317  fBasicUrl += fUrl.GetProtocol();
318  fBasicUrl += "://";
319  fBasicUrl += fUrl.GetHost();
320  fBasicUrl += ":";
321  fBasicUrl += fUrl.GetPort();
322  fBasicUrl += "/";
323  fBasicUrl += fUrl.GetFile();
324  fBasicUrl += "?";
325  fBasicUrl += fUrl.GetOptions();
326  }
327 
328  if (fMsgReadBuffer10 == "") {
329  fMsgReadBuffer10 = "GET ";
330  fMsgReadBuffer10 += fBasicUrl;
331  if (fHTTP11)
332  fMsgReadBuffer10 += " HTTP/1.1";
333  else
334  fMsgReadBuffer10 += " HTTP/1.0";
335  fMsgReadBuffer10 += "\r\n";
336  if (fHTTP11) {
337  fMsgReadBuffer10 += "Host: ";
338  fMsgReadBuffer10 += fUrl.GetHost();
339  fMsgReadBuffer10 += "\r\n";
340  }
341  fMsgReadBuffer10 += BasicAuthentication();
342  fMsgReadBuffer10 += gUserAgent;
343  fMsgReadBuffer10 += "\r\n";
344  fMsgReadBuffer10 += "Range: bytes=";
345  }
346 }
347 
348 ////////////////////////////////////////////////////////////////////////////////
349 /// Check if shell var "http_proxy" has been set and should be used.
350 
351 void TWebFile::CheckProxy()
352 {
353  if (fNoProxy)
354  return;
355 
356  if (fgProxy.IsValid()) {
357  fProxy = fgProxy;
358  return;
359  }
360 
361  TString proxy = gSystem->Getenv("http_proxy");
362  if (proxy != "") {
363  TUrl p(proxy);
364  if (strcmp(p.GetProtocol(), "http")) {
365  Error("CheckProxy", "protocol must be HTTP in proxy URL %s",
366  proxy.Data());
367  return;
368  }
369  fProxy = p;
370  if (gDebug > 0)
371  Info("CheckProxy", "using HTTP proxy %s", fProxy.GetUrl());
372  }
373 }
374 
375 ////////////////////////////////////////////////////////////////////////////////
376 /// A TWebFile that has been correctly constructed is always considered open.
377 
378 Bool_t TWebFile::IsOpen() const
379 {
380  return IsZombie() ? kFALSE : kTRUE;
381 }
382 
383 ////////////////////////////////////////////////////////////////////////////////
384 /// Reopen a file with a different access mode, like from READ to
385 /// UPDATE or from NEW, CREATE, RECREATE, UPDATE to READ. Thus the
386 /// mode argument can be either "READ" or "UPDATE". The method returns
387 /// 0 in case the mode was successfully modified, 1 in case the mode
388 /// did not change (was already as requested or wrong input arguments)
389 /// and -1 in case of failure, in which case the file cannot be used
390 /// anymore. A TWebFile cannot be reopened in update mode.
391 
392 Int_t TWebFile::ReOpen(Option_t *mode)
393 {
394  TString opt = mode;
395  opt.ToUpper();
396 
397  if (opt != "READ" && opt != "UPDATE")
398  Error("ReOpen", "mode must be either READ or UPDATE, not %s", opt.Data());
399 
400  if (opt == "UPDATE")
401  Error("ReOpen", "update mode not allowed for a TWebFile");
402 
403  return 1;
404 }
405 
406 ////////////////////////////////////////////////////////////////////////////////
407 /// Read specified byte range from remote file via HTTP daemon. This
408 /// routine connects to the remote host, sends the request and returns
409 /// the buffer. Returns kTRUE in case of error.
410 
411 Bool_t TWebFile::ReadBuffer(char *buf, Int_t len)
412 {
413  Int_t st;
414  if ((st = ReadBufferViaCache(buf, len))) {
415  if (st == 2)
416  return kTRUE;
417  return kFALSE;
418  }
419 
420  if (!fHasModRoot)
421  return ReadBuffer10(buf, len);
422 
423  // Give full URL so Apache's virtual hosts solution works.
424  // Use protocol 0.9 for efficiency, we are not interested in the 1.0 headers.
425  if (fMsgReadBuffer == "") {
426  fMsgReadBuffer = "GET ";
427  fMsgReadBuffer += fBasicUrl;
428  fMsgReadBuffer += "?";
429  }
430  TString msg = fMsgReadBuffer;
431  msg += fOffset;
432  msg += ":";
433  msg += len;
434  msg += "\r\n";
435 
436  if (GetFromWeb(buf, len, msg) == -1)
437  return kTRUE;
438 
439  fOffset += len;
440 
441  return kFALSE;
442 }
443 
444 ////////////////////////////////////////////////////////////////////////////////
445 /// Read specified byte range from remote file via HTTP daemon. This
446 /// routine connects to the remote host, sends the request and returns
447 /// the buffer. Returns kTRUE in case of error.
448 
449 Bool_t TWebFile::ReadBuffer(char *buf, Long64_t pos, Int_t len)
450 {
451  SetOffset(pos);
452  return ReadBuffer(buf, len);
453 }
454 
455 ////////////////////////////////////////////////////////////////////////////////
456 /// Read specified byte range from remote file via HTTP 1.0 daemon (without
457 /// mod-root installed). This routine connects to the remote host, sends the
458 /// request and returns the buffer. Returns kTRUE in case of error.
459 
460 Bool_t TWebFile::ReadBuffer10(char *buf, Int_t len)
461 {
462  SetMsgReadBuffer10();
463 
464  TString msg = fMsgReadBuffer10;
465  msg += fOffset;
466  msg += "-";
467  msg += fOffset+len-1;
468  msg += "\r\n\r\n";
469 
470  Long64_t apos = fOffset - fArchiveOffset;
471 
472  // in case when server does not support segments, let chance to recover
473  Int_t n = GetFromWeb10(buf, len, msg, 1, &apos, &len);
474  if (n == -1)
475  return kTRUE;
476  // The -2 error condition typically only happens when
477  // GetHead() failed because not implemented, in the first call to
478  // ReadBuffer() in Init(), it is not checked in ReadBuffers10().
479  if (n == -2) {
480  Error("ReadBuffer10", "%s does not exist", fBasicUrl.Data());
481  MakeZombie();
482  gDirectory = gROOT;
483  return kTRUE;
484  }
485 
486  fOffset += len;
487 
488  return kFALSE;
489 }
490 
491 ////////////////////////////////////////////////////////////////////////////////
492 /// Read specified byte ranges from remote file via HTTP daemon.
493 /// Reads the nbuf blocks described in arrays pos and len,
494 /// where pos[i] is the seek position of block i of length len[i].
495 /// Note that for nbuf=1, this call is equivalent to TFile::ReafBuffer
496 /// This function is overloaded by TNetFile, TWebFile, etc.
497 /// Returns kTRUE in case of failure.
498 
499 Bool_t TWebFile::ReadBuffers(char *buf, Long64_t *pos, Int_t *len, Int_t nbuf)
500 {
501  if (!fHasModRoot)
502  return ReadBuffers10(buf, pos, len, nbuf);
503 
504  // Give full URL so Apache's virtual hosts solution works.
505  // Use protocol 0.9 for efficiency, we are not interested in the 1.0 headers.
506  if (fMsgReadBuffer == "") {
507  fMsgReadBuffer = "GET ";
508  fMsgReadBuffer += fBasicUrl;
509  fMsgReadBuffer += "?";
510  }
511  TString msg = fMsgReadBuffer;
512 
513  Int_t k = 0, n = 0, cnt = 0;
514  for (Int_t i = 0; i < nbuf; i++) {
515  if (n) msg += ",";
516  msg += pos[i] + fArchiveOffset;
517  msg += ":";
518  msg += len[i];
519  n += len[i];
520  cnt++;
521  if ((msg.Length() > 8000) || (cnt >= 200)) {
522  msg += "\r\n";
523  if (GetFromWeb(&buf[k], n, msg) == -1)
524  return kTRUE;
525  msg = fMsgReadBuffer;
526  k += n;
527  n = 0;
528  cnt = 0;
529  }
530  }
531 
532  msg += "\r\n";
533 
534  if (GetFromWeb(&buf[k], n, msg) == -1)
535  return kTRUE;
536 
537  return kFALSE;
538 }
539 
540 ////////////////////////////////////////////////////////////////////////////////
541 /// Read specified byte ranges from remote file via HTTP 1.0 daemon (without
542 /// mod-root installed). Read the nbuf blocks described in arrays pos and len,
543 /// where pos[i] is the seek position of block i of length len[i].
544 /// Note that for nbuf=1, this call is equivalent to TFile::ReafBuffer
545 /// This function is overloaded by TNetFile, TWebFile, etc.
546 /// Returns kTRUE in case of failure.
547 
548 Bool_t TWebFile::ReadBuffers10(char *buf, Long64_t *pos, Int_t *len, Int_t nbuf)
549 {
550  SetMsgReadBuffer10();
551 
552  TString msg = fMsgReadBuffer10;
553 
554  Int_t k = 0, n = 0, r, cnt = 0;
555  for (Int_t i = 0; i < nbuf; i++) {
556  if (n) msg += ",";
557  msg += pos[i] + fArchiveOffset;
558  msg += "-";
559  msg += pos[i] + fArchiveOffset + len[i] - 1;
560  n += len[i];
561  cnt++;
562  if ((msg.Length() > 8000) || (cnt >= 200) || (i+1 == nbuf)) {
563  msg += "\r\n\r\n";
564  r = GetFromWeb10(&buf[k], n, msg, cnt, pos + (i+1-cnt), len + (i+1-cnt));
565  if (r == -1)
566  return kTRUE;
567  msg = fMsgReadBuffer10;
568  k += n;
569  n = 0;
570  cnt = 0;
571  }
572  }
573 
574  return kFALSE;
575 }
576 
577 ////////////////////////////////////////////////////////////////////////////////
578 /// Extract requested segments from the cached content.
579 /// Such cache can be produced when server suddenly returns full data instead of segments
580 /// Returns -1 in case of error, 0 in case of success
581 
582 Int_t TWebFile::GetFromCache(char *buf, Int_t len, Int_t nseg, Long64_t *seg_pos, Int_t *seg_len)
583 {
584  if (!fFullCache) return -1;
585 
586  if (gDebug > 0)
587  Info("GetFromCache", "Extract %d segments total len %d from cached data", nseg, len);
588 
589  Int_t curr = 0;
590  for (Int_t cnt=0;cnt<nseg;cnt++) {
591  // check that target buffer has enough space
592  if (curr + seg_len[cnt] > len) return -1;
593  // check that segment is inside cached area
594  if (fArchiveOffset + seg_pos[cnt] + seg_len[cnt] > fFullCacheSize) return -1;
595  char* src = (char*) fFullCache + fArchiveOffset + seg_pos[cnt];
596  memcpy(buf + curr, src, seg_len[cnt]);
597  curr += seg_len[cnt];
598  }
599 
600  return 0;
601 }
602 
603 ////////////////////////////////////////////////////////////////////////////////
604 /// Read request from web server. Returns -1 in case of error,
605 /// 0 in case of success.
606 
607 Int_t TWebFile::GetFromWeb(char *buf, Int_t len, const TString &msg)
608 {
609  TSocket *s;
610 
611  if (!len) return 0;
612 
613  Double_t start = 0;
614  if (gPerfStats) start = TTimeStamp();
615 
616  TUrl connurl;
617  if (fProxy.IsValid())
618  connurl = fProxy;
619  else
620  connurl = fUrl;
621 
622  if (strcmp(connurl.GetProtocol(), "https") == 0) {
623 #ifdef R__SSL
624  s = new TSSLSocket(connurl.GetHost(), connurl.GetPort());
625 #else
626  Error("GetFromWeb", "library compiled without SSL, https not supported");
627  return -1;
628 #endif
629  } else
630  s = new TSocket(connurl.GetHost(), connurl.GetPort());
631 
632  if (!s->IsValid()) {
633  Error("GetFromWeb", "cannot connect to host %s", fUrl.GetHost());
634  delete s;
635  return -1;
636  }
637 
638  if (s->SendRaw(msg.Data(), msg.Length()) == -1) {
639  Error("GetFromWeb", "error sending command to host %s", fUrl.GetHost());
640  delete s;
641  return -1;
642  }
643 
644  if (s->RecvRaw(buf, len) == -1) {
645  Error("GetFromWeb", "error receiving data from host %s", fUrl.GetHost());
646  delete s;
647  return -1;
648  }
649 
650  // collect statistics
651  fBytesRead += len;
652  fReadCalls++;
653 #ifdef R__WIN32
654  SetFileBytesRead(GetFileBytesRead() + len);
655  SetFileReadCalls(GetFileReadCalls() + 1);
656 #else
657  fgBytesRead += len;
658  fgReadCalls++;
659 #endif
660 
661  if (gPerfStats)
662  gPerfStats->FileReadEvent(this, len, start);
663 
664  delete s;
665  return 0;
666 }
667 
668 ////////////////////////////////////////////////////////////////////////////////
669 /// Read multiple byte range request from web server.
670 /// Uses HTTP 1.0 daemon wihtout mod-root.
671 /// Returns -2 in case file does not exist, -1 in case
672 /// of error and 0 in case of success.
673 
674 Int_t TWebFile::GetFromWeb10(char *buf, Int_t len, const TString &msg, Int_t nseg, Long64_t *seg_pos, Int_t *seg_len)
675 {
676  if (!len) return 0;
677 
678  // if file content was cached, reuse it
679  if (fFullCache && (nseg>0))
680  return GetFromCache(buf, len, nseg, seg_pos, seg_len);
681 
682  Double_t start = 0;
683  if (gPerfStats) start = TTimeStamp();
684 
685  // open fSocket and close it when going out of scope
686  TWebSocket ws(this);
687 
688  if (!fSocket || !fSocket->IsValid()) {
689  Error("GetFromWeb10", "cannot connect to host %s", fUrl.GetHost());
690  return -1;
691  }
692 
693  if (gDebug > 0)
694  Info("GetFromWeb10", "sending HTTP request:\n%s", msg.Data());
695 
696  if (fSocket->SendRaw(msg.Data(), msg.Length()) == -1) {
697  Error("GetFromWeb10", "error sending command to host %s", fUrl.GetHost());
698  return -1;
699  }
700 
701  char line[8192];
702  Int_t n, ret = 0, nranges = 0, ltot = 0, redirect = 0;
703  TString boundary, boundaryEnd;
704  Long64_t first = -1, last = -1, tot, fullsize = 0;
705  TString redir;
706 
707  while ((n = GetLine(fSocket, line, sizeof(line))) >= 0) {
708  if (n == 0) {
709  if (ret < 0)
710  return ret;
711  if (redirect) {
712  if (redir.IsNull()) {
713  // Some sites (s3.amazonaws.com) do not return a Location field on 301
714  Error("GetFromWeb10", "error - redirect without location from host %s", fUrl.GetHost());
715  return -1;
716  }
717 
718  ws.ReOpen();
719  // set message to reflect the redirectLocation and add bytes field
720  TString msg_1 = fMsgReadBuffer10;
721  msg_1 += fOffset;
722  msg_1 += "-";
723  msg_1 += fOffset+len-1;
724  msg_1 += "\r\n\r\n";
725  return GetFromWeb10(buf, len, msg_1);
726  }
727 
728  if (first >= 0) {
729  Int_t ll = Int_t(last - first) + 1;
730  Int_t rsize;
731  if ((rsize = fSocket->RecvRaw(&buf[ltot], ll)) == -1) {
732  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
733  return -1;
734  }
735  else if (ll != rsize) {
736  Error("GetFromWeb10", "expected %d bytes, got %d", ll, rsize);
737  return -1;
738  }
739  ltot += ll;
740 
741  first = -1;
742 
743  if (boundary == "")
744  break; // not a multipart response
745  }
746 
747  if (fullsize > 0) {
748 
749  if (nseg <= 0) {
750  Error("GetFromWeb10","Need segments data to extract parts from full size %lld", fullsize);
751  return -1;
752  }
753 
754  if (len > fullsize) {
755  Error("GetFromWeb10","Requested part %d longer than full size %lld", len, fullsize);
756  return -1;
757  }
758 
759  if ((fFullCache == 0) && (fullsize <= GetMaxFullCacheSize())) {
760  // try to read file content into cache and than reuse it, limit cache by 2 GB
761  fFullCache = malloc(fullsize);
762  if (fFullCache != 0) {
763  if (fSocket->RecvRaw(fFullCache, fullsize) != fullsize) {
764  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
765  free(fFullCache); fFullCache = 0;
766  return -1;
767  }
768  fFullCacheSize = fullsize;
769  return GetFromCache(buf, len, nseg, seg_pos, seg_len);
770  }
771  // when cache allocation failed, try without cache
772  }
773 
774  // check all segemnts are inside range and in sorted order
775  for (Int_t cnt=0;cnt<nseg;cnt++) {
776  if (fArchiveOffset + seg_pos[cnt] + seg_len[cnt] > fullsize) {
777  Error("GetFromWeb10","Requested segment %lld len %d is outside of full range %lld", seg_pos[cnt], seg_len[cnt], fullsize);
778  return -1;
779  }
780  if ((cnt>0) && (seg_pos[cnt-1] + seg_len[cnt-1] > seg_pos[cnt])) {
781  Error("GetFromWeb10","Requested segments are not in sorted order");
782  return -1;
783  }
784  }
785 
786  Long64_t pos = 0;
787  char* curr = buf;
788  char dbuf[2048]; // dummy buffer for skip data
789 
790  // now read complete file and take only requested segments into the buffer
791  for (Int_t cnt=0; cnt<nseg; cnt++) {
792  // first skip data before segment
793  while (pos < fArchiveOffset + seg_pos[cnt]) {
794  Long64_t ll = fArchiveOffset + seg_pos[cnt] - pos;
795  if (ll > Int_t(sizeof(dbuf))) ll = sizeof(dbuf);
796  if (fSocket->RecvRaw(dbuf, ll) != ll) {
797  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
798  return -1;
799  }
800  pos += ll;
801  }
802 
803  // reading segment itself
804  if (fSocket->RecvRaw(curr, seg_len[cnt]) != seg_len[cnt]) {
805  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
806  return -1;
807  }
808  curr += seg_len[cnt];
809  pos += seg_len[cnt];
810  ltot += seg_len[cnt];
811  }
812 
813  // now read file to the end
814  while (pos < fullsize) {
815  Long64_t ll = fullsize - pos;
816  if (ll > Int_t(sizeof(dbuf))) ll = sizeof(dbuf);
817  if (fSocket->RecvRaw(dbuf, ll) != ll) {
818  Error("GetFromWeb10", "error receiving data from host %s", fUrl.GetHost());
819  return -1;
820  }
821  pos += ll;
822  }
823 
824  if (gDebug>0) Info("GetFromWeb10","Complete reading %d bytes in %d segments out of full size %lld", len, nseg, fullsize);
825 
826  break;
827  }
828 
829  continue;
830  }
831 
832  if (gDebug > 0)
833  Info("GetFromWeb10", "header: %s", line);
834 
835  if (boundaryEnd == line) {
836  if (gDebug > 0)
837  Info("GetFromWeb10", "got all headers");
838  break;
839  }
840  if (boundary == line) {
841  nranges++;
842  if (gDebug > 0)
843  Info("GetFromWeb10", "get new multipart byte range (%d)", nranges);
844  }
845 
846  TString res = line;
847 
848  if (res.BeginsWith("HTTP/1.")) {
849  if (res.BeginsWith("HTTP/1.1")) {
850  if (!fHTTP11)
851  fMsgReadBuffer10 = "";
852  fHTTP11 = kTRUE;
853  }
854  TString scode = res(9, 3);
855  Int_t code = scode.Atoi();
856  if (code >= 500) {
857  ret = -1;
858  TString mess = res(13, 1000);
859  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
860  } else if (code >= 400) {
861  if (code == 404)
862  ret = -2; // file does not exist
863  else {
864  ret = -1;
865  TString mess = res(13, 1000);
866  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
867  }
868  } else if (code >= 300) {
869  if (code == 301 || code == 303) {
870  redirect = 1; // permanent redirect
871  } else if (code == 302 || code == 307) {
872  // treat 302 as 303: permanent redirect
873  redirect = 1;
874  //redirect = 2; // temp redirect
875  } else {
876  ret = -1;
877  TString mess = res(13, 1000);
878  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
879  }
880  } else if (code > 200) {
881  if (code != 206) {
882  ret = -1;
883  TString mess = res(13, 1000);
884  Error("GetFromWeb10", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
885  }
886  } else if (code == 200) {
887  fullsize = -200; // make indication of code 200
888  Warning("GetFromWeb10",
889  "Server %s response with complete file, but only part of it was requested.\n"
890  "Check MaxRanges configuration parameter (if Apache is used)",
891  fUrl.GetHost());
892 
893  }
894  } else if (res.BeginsWith("Content-Type: multipart")) {
895  boundary = res(res.Index("boundary=")+9, 1000);
896  if (boundary[0]=='"' && boundary[boundary.Length()-1]=='"') {
897  boundary = boundary(1,boundary.Length()-2);
898  }
899  boundary = "--" + boundary;
900  boundaryEnd = boundary + "--";
901  } else if (res.BeginsWith("Content-range:")) {
902 #ifdef R__WIN32
903  sscanf(res.Data(), "Content-range: bytes %I64d-%I64d/%I64d", &first, &last, &tot);
904 #else
905  sscanf(res.Data(), "Content-range: bytes %lld-%lld/%lld", &first, &last, &tot);
906 #endif
907  if (fSize == -1) fSize = tot;
908  } else if (res.BeginsWith("Content-Range:")) {
909 #ifdef R__WIN32
910  sscanf(res.Data(), "Content-Range: bytes %I64d-%I64d/%I64d", &first, &last, &tot);
911 #else
912  sscanf(res.Data(), "Content-Range: bytes %lld-%lld/%lld", &first, &last, &tot);
913 #endif
914  if (fSize == -1) fSize = tot;
915  } else if (res.BeginsWith("Content-Length:") && (fullsize == -200)) {
916 #ifdef R__WIN32
917  sscanf(res.Data(), "Content-Length: %I64d", &fullsize);
918 #else
919  sscanf(res.Data(), "Content-Length: %lld", &fullsize);
920 #endif
921  } else if (res.BeginsWith("Location:") && redirect) {
922  redir = res(10, 1000);
923  if (redirect == 2) // temp redirect
924  SetMsgReadBuffer10(redir, kTRUE);
925  else // permanent redirect
926  SetMsgReadBuffer10(redir, kFALSE);
927  }
928  }
929 
930  if (redirect && redir.IsNull()) {
931  ret = -1;
932  Error("GetFromWeb10", "error - redirect without location from host %s", fUrl.GetHost());
933  }
934 
935  if (n == -1 && fHTTP11) {
936  if (gDebug > 0)
937  Info("GetFromWeb10", "HTTP/1.1 socket closed, reopen");
938  if (fBasicUrlOrg != "") {
939  // if we have to close temp redirection, set back to original url
940  SetMsgReadBuffer10();
941  }
942  ws.ReOpen();
943  return GetFromWeb10(buf, len, msg);
944  }
945 
946  if (ltot != len) {
947  Error("GetFromWeb10", "error receiving expected amount of data (got %d, expected %d) from host %s",
948  ltot, len, fUrl.GetHost());
949  return -1;
950  }
951 
952  // collect statistics
953  fBytesRead += len;
954  fReadCalls++;
955 #ifdef R__WIN32
956  SetFileBytesRead(GetFileBytesRead() + len);
957  SetFileReadCalls(GetFileReadCalls() + 1);
958 #else
959  fgBytesRead += len;
960  fgReadCalls++;
961 #endif
962 
963  if (gPerfStats)
964  gPerfStats->FileReadEvent(this, len, start);
965 
966  return 0;
967 }
968 
969 ////////////////////////////////////////////////////////////////////////////////
970 /// Set position from where to start reading.
971 
972 void TWebFile::Seek(Long64_t offset, ERelativeTo pos)
973 {
974  switch (pos) {
975  case kBeg:
976  fOffset = offset + fArchiveOffset;
977  break;
978  case kCur:
979  fOffset += offset;
980  break;
981  case kEnd:
982  // this option is not used currently in the ROOT code
983  if (fArchiveOffset)
984  Error("Seek", "seeking from end in archive is not (yet) supported");
985  fOffset = fEND - offset; // is fEND really EOF or logical EOF?
986  break;
987  }
988 }
989 
990 ////////////////////////////////////////////////////////////////////////////////
991 /// Return maximum file size.
992 
993 Long64_t TWebFile::GetSize() const
994 {
995  if (!fHasModRoot || fSize >= 0)
996  return fSize;
997 
998  Long64_t size;
999  char asize[64];
1000 
1001  TString msg = "GET ";
1002  msg += fBasicUrl;
1003  msg += "?";
1004  msg += -1;
1005  msg += "\r\n";
1006 
1007  if (const_cast<TWebFile*>(this)->GetFromWeb(asize, 64, msg) == -1)
1008  return kMaxInt;
1009 
1010 #ifndef R__WIN32
1011  size = atoll(asize);
1012 #else
1013  size = _atoi64(asize);
1014 #endif
1015 
1016  fSize = size;
1017 
1018  return size;
1019 }
1020 
1021 ////////////////////////////////////////////////////////////////////////////////
1022 /// Get the HTTP header. Depending on the return code we can see if
1023 /// the file exists and if the server uses mod_root.
1024 /// Returns -1 in case of an error, -2 in case the file does not exists,
1025 /// -3 in case HEAD is not supported (dCache HTTP door) and
1026 /// 0 in case of success.
1027 
1028 Int_t TWebFile::GetHead()
1029 {
1030  // Give full URL so Apache's virtual hosts solution works.
1031  if (fMsgGetHead == "") {
1032  fMsgGetHead = "HEAD ";
1033  fMsgGetHead += fBasicUrl;
1034  if (fHTTP11)
1035  fMsgGetHead += " HTTP/1.1";
1036  else
1037  fMsgGetHead += " HTTP/1.0";
1038  fMsgGetHead += "\r\n";
1039  if (fHTTP11) {
1040  fMsgGetHead += "Host: ";
1041  fMsgGetHead += fUrl.GetHost();
1042  fMsgGetHead += "\r\n";
1043  }
1044  fMsgGetHead += BasicAuthentication();
1045  fMsgGetHead += gUserAgent;
1046  fMsgGetHead += "\r\n\r\n";
1047  }
1048  TString msg = fMsgGetHead;
1049 
1050  TUrl connurl;
1051  if (fProxy.IsValid())
1052  connurl = fProxy;
1053  else
1054  connurl = fUrl;
1055 
1056  TSocket *s = 0;
1057  for (Int_t i = 0; i < 5; i++) {
1058  if (strcmp(connurl.GetProtocol(), "https") == 0) {
1059 #ifdef R__SSL
1060  s = new TSSLSocket(connurl.GetHost(), connurl.GetPort());
1061 #else
1062  Error("GetHead", "library compiled without SSL, https not supported");
1063  return -1;
1064 #endif
1065  } else
1066  s = new TSocket(connurl.GetHost(), connurl.GetPort());
1067 
1068  if (!s->IsValid()) {
1069  delete s;
1070  if (gSystem->GetErrno() == EADDRINUSE || gSystem->GetErrno() == EISCONN) {
1071  s = 0;
1072  gSystem->Sleep(i*10);
1073  } else {
1074  Error("GetHead", "cannot connect to host %s (errno=%d)", fUrl.GetHost(),
1075  gSystem->GetErrno());
1076  return -1;
1077  }
1078  } else
1079  break;
1080  }
1081  if (!s)
1082  return -1;
1083 
1084  if (gDebug > 0) {
1085  Info("GetHead", "connected to host %s", connurl.GetHost());
1086  Info("GetHead", "sending HTTP request:\n%s", msg.Data());
1087  }
1088 
1089  if (s->SendRaw(msg.Data(), msg.Length()) == -1) {
1090  Error("GetHead", "error sending command to host %s", fUrl.GetHost());
1091  delete s;
1092  return -1;
1093  }
1094 
1095  char line[8192];
1096  Int_t n, ret = 0, redirect = 0;
1097  TString redir;
1098 
1099  while ((n = GetLine(s, line, sizeof(line))) >= 0) {
1100  if (n == 0) {
1101  if (gDebug > 0)
1102  Info("GetHead", "got all headers");
1103  delete s;
1104  if (fBasicUrlOrg != "" && !redirect) {
1105  // set back to original url in case of temp redirect
1106  SetMsgReadBuffer10();
1107  fMsgGetHead = "";
1108  }
1109  if (ret < 0)
1110  return ret;
1111  if (redirect) {
1112  if (redir.IsNull()) {
1113  // Some sites (s3.amazonaws.com) do not return a Location field on 301
1114  Error("GetHead", "error - redirect without location from host %s", fUrl.GetHost());
1115  return -1;
1116  }
1117  return GetHead();
1118  }
1119  return 0;
1120  }
1121 
1122  if (gDebug > 0)
1123  Info("GetHead", "header: %s", line);
1124 
1125  TString res = line;
1126  ProcessHttpHeader(res);
1127  if (res.BeginsWith("HTTP/1.")) {
1128  if (res.BeginsWith("HTTP/1.1")) {
1129  if (!fHTTP11) {
1130  fMsgGetHead = "";
1131  fMsgReadBuffer10 = "";
1132  }
1133  fHTTP11 = kTRUE;
1134  }
1135  TString scode = res(9, 3);
1136  Int_t code = scode.Atoi();
1137  if (code >= 500) {
1138  if (code == 500)
1139  fHasModRoot = kTRUE;
1140  else {
1141  ret = -1;
1142  TString mess = res(13, 1000);
1143  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1144  }
1145  } else if (code >= 400) {
1146  if (code == 400)
1147  ret = -3; // command not supported
1148  else if (code == 404)
1149  ret = -2; // file does not exist
1150  else {
1151  ret = -1;
1152  TString mess = res(13, 1000);
1153  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1154  }
1155  } else if (code >= 300) {
1156  if (code == 301 || code == 303)
1157  redirect = 1; // permanent redirect
1158  else if (code == 302 || code == 307)
1159  redirect = 2; // temp redirect
1160  else {
1161  ret = -1;
1162  TString mess = res(13, 1000);
1163  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1164  }
1165  } else if (code > 200) {
1166  ret = -1;
1167  TString mess = res(13, 1000);
1168  Error("GetHead", "%s: %s (%d)", fBasicUrl.Data(), mess.Data(), code);
1169  }
1170  } else if (res.BeginsWith("Content-Length:")) {
1171  TString slen = res(16, 1000);
1172  fSize = slen.Atoll();
1173  } else if (res.BeginsWith("Location:") && redirect) {
1174  redir = res(10, 1000);
1175  if (redirect == 2) // temp redirect
1176  SetMsgReadBuffer10(redir, kTRUE);
1177  else // permanent redirect
1178  SetMsgReadBuffer10(redir, kFALSE);
1179  fMsgGetHead = "";
1180  }
1181  }
1182 
1183  delete s;
1184 
1185  return ret;
1186 }
1187 
1188 ////////////////////////////////////////////////////////////////////////////////
1189 /// Read a line from the socket. Reads at most one less than the number of
1190 /// characters specified by maxsize. Reading stops when a newline character
1191 /// is found, The newline (\n) and cr (\r), if any, are removed.
1192 /// Returns -1 in case of error, or the number of characters read (>= 0)
1193 /// otherwise.
1194 
1195 Int_t TWebFile::GetLine(TSocket *s, char *line, Int_t maxsize)
1196 {
1197  Int_t n = GetHunk(s, line, maxsize);
1198  if (n < 0) {
1199  if (!fHTTP11 || gDebug > 0)
1200  Error("GetLine", "error receiving data from host %s", fUrl.GetHost());
1201  return -1;
1202  }
1203 
1204  if (n > 0 && line[n-1] == '\n') {
1205  n--;
1206  if (n > 0 && line[n-1] == '\r')
1207  n--;
1208  line[n] = '\0';
1209  }
1210  return n;
1211 }
1212 
1213 ////////////////////////////////////////////////////////////////////////////////
1214 /// Read a hunk of data from the socket, up until a terminator. The hunk is
1215 /// limited by whatever the TERMINATOR callback chooses as its
1216 /// terminator. For example, if terminator stops at newline, the hunk
1217 /// will consist of a line of data; if terminator stops at two
1218 /// newlines, it can be used to read the head of an HTTP response.
1219 /// Upon determining the boundary, the function returns the data (up to
1220 /// the terminator) in hunk.
1221 ///
1222 /// In case of read error, -1 is returned. In case of having read some
1223 /// data, but encountering EOF before seeing the terminator, the data
1224 /// that has been read is returned, but it will (obviously) not contain the
1225 /// terminator.
1226 ///
1227 /// The TERMINATOR function is called with three arguments: the
1228 /// beginning of the data read so far, the beginning of the current
1229 /// block of peeked-at data, and the length of the current block.
1230 /// Depending on its needs, the function is free to choose whether to
1231 /// analyze all data or just the newly arrived data. If TERMINATOR
1232 /// returns 0, it means that the terminator has not been seen.
1233 /// Otherwise it should return a pointer to the character immediately
1234 /// following the terminator.
1235 ///
1236 /// The idea is to be able to read a line of input, or otherwise a hunk
1237 /// of text, such as the head of an HTTP request, without crossing the
1238 /// boundary, so that the next call to RecvRaw() etc. reads the data
1239 /// after the hunk. To achieve that, this function does the following:
1240 ///
1241 /// 1. Peek at incoming data.
1242 ///
1243 /// 2. Determine whether the peeked data, along with the previously
1244 /// read data, includes the terminator.
1245 ///
1246 /// 3a. If yes, read the data until the end of the terminator, and
1247 /// exit.
1248 ///
1249 /// 3b. If no, read the peeked data and goto 1.
1250 ///
1251 /// The function is careful to assume as little as possible about the
1252 /// implementation of peeking. For example, every peek is followed by
1253 /// a read. If the read returns a different amount of data, the
1254 /// process is retried until all data arrives safely.
1255 ///
1256 /// Reads at most one less than the number of characters specified by maxsize.
1257 
1258 Int_t TWebFile::GetHunk(TSocket *s, char *hunk, Int_t maxsize)
1259 {
1260  if (maxsize <= 0) return 0;
1261 
1262  Int_t bufsize = maxsize;
1263  Int_t tail = 0; // tail position in HUNK
1264 
1265  while (1) {
1266  const char *end;
1267  Int_t pklen, rdlen, remain;
1268 
1269  // First, peek at the available data.
1270  pklen = s->RecvRaw(hunk+tail, bufsize-1-tail, kPeek);
1271  if (pklen < 0) {
1272  return -1;
1273  }
1274  end = HttpTerminator(hunk, hunk+tail, pklen);
1275  if (end) {
1276  // The data contains the terminator: we'll drain the data up
1277  // to the end of the terminator.
1278  remain = end - (hunk + tail);
1279  if (remain == 0) {
1280  // No more data needs to be read.
1281  hunk[tail] = '\0';
1282  return tail;
1283  }
1284  if (bufsize - 1 < tail + remain) {
1285  Error("GetHunk", "hunk buffer too small for data from host %s (%d bytes needed)",
1286  fUrl.GetHost(), tail + remain + 1);
1287  hunk[tail] = '\0';
1288  return -1;
1289  }
1290  } else {
1291  // No terminator: simply read the data we know is (or should
1292  // be) available.
1293  remain = pklen;
1294  }
1295 
1296  // Now, read the data. Note that we make no assumptions about
1297  // how much data we'll get. (Some TCP stacks are notorious for
1298  // read returning less data than the previous MSG_PEEK.)
1299  rdlen = s->RecvRaw(hunk+tail, remain, kDontBlock);
1300  if (rdlen < 0) {
1301  return -1;
1302  }
1303  tail += rdlen;
1304  hunk[tail] = '\0';
1305 
1306  if (rdlen == 0) {
1307  // in case of EOF: return the data we've read.
1308  return tail;
1309  }
1310  if (end && rdlen == remain) {
1311  // The terminator was seen and the remaining data drained --
1312  // we got what we came for.
1313  return tail;
1314  }
1315 
1316  // Keep looping until all the data arrives.
1317 
1318  if (tail == bufsize - 1) {
1319  Error("GetHunk", "hunk buffer too small for data from host %s",
1320  fUrl.GetHost());
1321  return -1;
1322  }
1323  }
1324 }
1325 
1326 ////////////////////////////////////////////////////////////////////////////////
1327 /// Determine whether [START, PEEKED + PEEKLEN) contains an HTTP new
1328 /// line [\\r]\\n. If so, return the pointer to the position after the line,
1329 /// otherwise return 0. This is used as callback to GetHunk(). The data
1330 /// between START and PEEKED has been read and cannot be "unread"; the
1331 /// data after PEEKED has only been peeked.
1332 
1333 const char *TWebFile::HttpTerminator(const char *start, const char *peeked,
1334  Int_t peeklen)
1335 {
1336 #if 0
1337  const char *p, *end;
1338 
1339  // Look for "[\r]\n", and return the following position if found.
1340  // Start one char before the current to cover the possibility that
1341  // part of the terminator (e.g. "\r") arrived in the previous batch.
1342  p = peeked - start < 1 ? start : peeked - 1;
1343  end = peeked + peeklen;
1344 
1345  // Check for \r\n anywhere in [p, end-2).
1346  for (; p < end - 1; p++)
1347  if (p[0] == '\r' && p[1] == '\n')
1348  return p + 2;
1349 
1350  // p==end-1: check for \r\n directly preceding END.
1351  if (p[0] == '\r' && p[1] == '\n')
1352  return p + 2;
1353 #else
1354  if (start) { } // start unused, silence compiler
1355  const char *p = (const char*) memchr(peeked, '\n', peeklen);
1356  if (p)
1357  // p+1 because the line must include '\n'
1358  return p + 1;
1359 #endif
1360  return 0;
1361 }
1362 
1363 ////////////////////////////////////////////////////////////////////////////////
1364 /// Return basic authentication scheme, to be added to the request.
1365 
1366 TString TWebFile::BasicAuthentication()
1367 {
1368  TString msg;
1369  if (strlen(fUrl.GetUser())) {
1370  TString auth = fUrl.GetUser();
1371  if (strlen(fUrl.GetPasswd())) {
1372  auth += ":";
1373  auth += fUrl.GetPasswd();
1374  }
1375  msg += "Authorization: Basic ";
1376  msg += TBase64::Encode(auth);
1377  msg += "\r\n";
1378  }
1379  return msg;
1380 }
1381 
1382 ////////////////////////////////////////////////////////////////////////////////
1383 /// Static method setting global proxy URL.
1384 
1385 void TWebFile::SetProxy(const char *proxy)
1386 {
1387  if (proxy && *proxy) {
1388  TUrl p(proxy);
1389  if (strcmp(p.GetProtocol(), "http")) {
1390  :: Error("TWebFile::SetProxy", "protocol must be HTTP in proxy URL %s",
1391  proxy);
1392  return;
1393  }
1394  fgProxy = p;
1395  }
1396 }
1397 
1398 ////////////////////////////////////////////////////////////////////////////////
1399 /// Static method returning the global proxy URL.
1400 
1401 const char *TWebFile::GetProxy()
1402 {
1403  if (fgProxy.IsValid())
1404  return fgProxy.GetUrl();
1405  return "";
1406 }
1407 
1408 ////////////////////////////////////////////////////////////////////////////////
1409 /// Process the HTTP header in the argument. This method is intended to be
1410 /// overwritten by subclasses that exploit the information contained in the
1411 /// HTTP headers.
1412 
1413 void TWebFile::ProcessHttpHeader(const TString&)
1414 {
1415 }
1416 
1417 ////////////////////////////////////////////////////////////////////////////////
1418 /// Static method returning maxmimal size of full cache,
1419 /// which can be preserved by file instance
1420 
1421 Long64_t TWebFile::GetMaxFullCacheSize()
1422 {
1423  return fgMaxFullCacheSize;
1424 }
1425 
1426 ////////////////////////////////////////////////////////////////////////////////
1427 /// Static method, set maxmimal size of full cache,
1428 // which can be preserved by file instance
1429 
1430 void TWebFile::SetMaxFullCacheSize(Long64_t sz)
1431 {
1432  fgMaxFullCacheSize = sz;
1433 }
1434 
1435 
1436 ////////////////////////////////////////////////////////////////////////////////
1437 /// Create helper class that allows directory access via httpd.
1438 /// The name must start with '-' to bypass the TSystem singleton check.
1439 
1440 TWebSystem::TWebSystem() : TSystem("-http", "HTTP Helper System")
1441 {
1442  SetName("http");
1443 
1444  fDirp = 0;
1445 }
1446 
1447 ////////////////////////////////////////////////////////////////////////////////
1448 /// Make a directory via httpd. Not supported.
1449 
1450 Int_t TWebSystem::MakeDirectory(const char *)
1451 {
1452  return -1;
1453 }
1454 
1455 ////////////////////////////////////////////////////////////////////////////////
1456 /// Open a directory via httpd. Returns an opaque pointer to a dir
1457 /// structure. Returns 0 in case of error.
1458 
1459 void *TWebSystem::OpenDirectory(const char *)
1460 {
1461  if (fDirp) {
1462  Error("OpenDirectory", "invalid directory pointer (should never happen)");
1463  fDirp = 0;
1464  }
1465 
1466  fDirp = 0; // not implemented for the time being
1467 
1468  return fDirp;
1469 }
1470 
1471 ////////////////////////////////////////////////////////////////////////////////
1472 /// Free directory via httpd.
1473 
1474 void TWebSystem::FreeDirectory(void *dirp)
1475 {
1476  if (dirp != fDirp) {
1477  Error("FreeDirectory", "invalid directory pointer (should never happen)");
1478  return;
1479  }
1480 
1481  fDirp = 0;
1482 }
1483 
1484 ////////////////////////////////////////////////////////////////////////////////
1485 /// Get directory entry via httpd. Returns 0 in case no more entries.
1486 
1487 const char *TWebSystem::GetDirEntry(void *dirp)
1488 {
1489  if (dirp != fDirp) {
1490  Error("GetDirEntry", "invalid directory pointer (should never happen)");
1491  return 0;
1492  }
1493 
1494  return 0;
1495 }
1496 
1497 ////////////////////////////////////////////////////////////////////////////////
1498 /// Get info about a file. Info is returned in the form of a FileStat_t
1499 /// structure (see TSystem.h).
1500 /// The function returns 0 in case of success and 1 if the file could
1501 /// not be stat'ed.
1502 
1503 Int_t TWebSystem::GetPathInfo(const char *path, FileStat_t &buf)
1504 {
1505  TWebFile *f = new TWebFile(path, "HEADONLY");
1506 
1507  if (f->fWritten == 0) {
1508 
1509  buf.fDev = 0;
1510  buf.fIno = 0;
1511  buf.fMode = 0;
1512  buf.fUid = 0;
1513  buf.fGid = 0;
1514  buf.fSize = f->GetSize();
1515  buf.fMtime = 0;
1516  buf.fIsLink = kFALSE;
1517 
1518  delete f;
1519  return 0;
1520  }
1521 
1522  delete f;
1523  return 1;
1524 }
1525 
1526 ////////////////////////////////////////////////////////////////////////////////
1527 /// Returns FALSE if one can access a file using the specified access mode.
1528 /// Mode is the same as for the Unix access(2) function.
1529 /// Attention, bizarre convention of return value!!
1530 
1531 Bool_t TWebSystem::AccessPathName(const char *path, EAccessMode)
1532 {
1533  TWebFile *f = new TWebFile(path, "HEADONLY");
1534  if (f->fWritten == 0) {
1535  delete f;
1536  return kFALSE;
1537  }
1538  delete f;
1539  return kTRUE;
1540 }
1541 
1542 ////////////////////////////////////////////////////////////////////////////////
1543 /// Unlink, i.e. remove, a file or directory. Returns 0 when successful,
1544 /// -1 in case of failure. Not supported for httpd.
1545 
1546 Int_t TWebSystem::Unlink(const char *)
1547 {
1548  return -1;
1549 }