Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RRawFile.hxx
Go to the documentation of this file.
1 // @(#)root/io:$Id$
2 // Author: Jakob Blomer
3 
4 /*************************************************************************
5  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 #ifndef ROOT_RRawFile
13 #define ROOT_RRawFile
14 
15 #include <ROOT/RStringView.hxx>
16 
17 #include <cstddef>
18 #include <cstdint>
19 #include <memory>
20 #include <string>
21 
22 namespace ROOT {
23 namespace Internal {
24 
25 /**
26  * \class RRawFile RRawFile.hxx
27  * \ingroup IO
28  *
29  * The RRawFile provides read-only access to local and remote files. Data can be read either byte-wise or line-wise.
30  * The RRawFile base class provides line-wise access and buffering for byte-wise access. Derived classes provide the
31  * low-level read operations, e.g. from a local file system or from a web server. The RRawFile is used for non-ROOT
32  * RDataSource implementations and for RNTuple.
33  *
34  * Files are addressed by URL consisting of a transport protocol part and a location, like file:///path/to/data
35  * If the transport protocol part and the :// separator are missing, the default protocol is local file. Files are
36  * opened when required (on reading, getting file size) and closed on object destruction.
37  *
38  * RRawFiles manage system respources and are therefore made non-copyable. They can be explicitly cloned though.
39  */
40 class RRawFile {
41 public:
42  /// Derived classes do not necessarily need to provide file size information but they can return "not known" instead
43  static constexpr std::uint64_t kUnknownFileSize = std::uint64_t(-1);
44  /// kAuto detects the line break from the first line, kSystem picks the system's default
45  enum class ELineBreaks { kAuto, kSystem, kUnix, kWindows };
46 
47  // Combination of flags provided by derived classes about the nature of the file
48  /// GetSize() does not return kUnknownFileSize
49  static constexpr int kFeatureHasSize = 0x01;
50  /// Map() and Unmap() are implemented
51  static constexpr int kFeatureHasMmap = 0x02;
52 
53  /// On construction, an ROptions parameter can customize the RRawFile behavior
54  struct ROptions {
55  ELineBreaks fLineBreak;
56  /**
57  * Read at least fBlockSize bytes at a time. A value of zero turns off I/O buffering. A negative value indicates
58  * that the protocol-dependent default block size should be used.
59  */
60  int fBlockSize;
61  ROptions() : fLineBreak(ELineBreaks::kAuto), fBlockSize(-1) {}
62  };
63 
64 private:
65  /// Don't change without adapting ReadAt()
66  static constexpr unsigned int kNumBlockBuffers = 2;
67  struct RBlockBuffer {
68  /// Where in the open file does fBuffer start
69  std::uint64_t fBufferOffset;
70  /// The number of currently buffered bytes in fBuffer
71  size_t fBufferSize;
72  /// Points into the I/O buffer with data from the file, not owned.
73  unsigned char *fBuffer;
74 
75  RBlockBuffer() : fBufferOffset(0), fBufferSize(0), fBuffer(nullptr) {}
76  RBlockBuffer(const RBlockBuffer &) = delete;
77  RBlockBuffer &operator=(const RBlockBuffer &) = delete;
78  ~RBlockBuffer() = default;
79 
80  /// Tries to copy up to nbytes starting at offset from fBuffer into buffer. Returns number of bytes copied.
81  size_t CopyTo(void *buffer, size_t nbytes, std::uint64_t offset);
82  };
83  /// To be used modulo kNumBlockBuffers, points to the last used block buffer in fBlockBuffers
84  unsigned int fBlockBufferIdx;
85  /// An active buffer and a shadow buffer, which supports "jumping back" to a previously used location in the file
86  RBlockBuffer fBlockBuffers[kNumBlockBuffers];
87  /// Memory block containing the block buffers consecutively
88  unsigned char *fBufferSpace;
89  /// The cached file size
90  std::uint64_t fFileSize;
91  /// Files are opened lazily and only when required; the open state is kept by this flag
92  bool fIsOpen;
93 
94 protected:
95  std::string fUrl;
96  ROptions fOptions;
97  /// The current position in the file, which can be changed by Seek, Read, and Readln
98  std::uint64_t fFilePos;
99 
100  /**
101  * OpenImpl() is called at most once and before any call to either DoReadAt or DoGetSize. If fOptions.fBlocksize
102  * is negative, derived classes are responsible to set a sensible value. After a call to OpenImpl(),
103  * fOptions.fBlocksize must be larger or equal to zero.
104  */
105  virtual void OpenImpl() = 0;
106  /**
107  * Derived classes should implement low-level reading without buffering. Short reads indicate the end of the file,
108  * therefore derived classes should return nbytes bytes if available.
109  */
110  virtual size_t ReadAtImpl(void *buffer, size_t nbytes, std::uint64_t offset) = 0;
111  /// Derived classes should return the file size or kUnknownFileSize
112  virtual std::uint64_t GetSizeImpl() = 0;
113 
114  /// If a derived class supports mmap, the MapImpl and UnmapImpl calls are supposed to be implemented, too
115  /// The default implementation throws an error
116  virtual void *MapImpl(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
117  /// Derived classes with mmap support must be able to unmap the memory area handed out by Map()
118  virtual void UnmapImpl(void *region, size_t nbytes);
119 
120 public:
121  RRawFile(std::string_view url, ROptions options);
122  RRawFile(const RRawFile &) = delete;
123  RRawFile &operator=(const RRawFile &) = delete;
124  virtual ~RRawFile();
125 
126  /// Create a new RawFile that accesses the same resource. The file pointer is reset to zero.
127  virtual std::unique_ptr<RRawFile> Clone() const = 0;
128 
129  /// Factory method that returns a suitable concrete implementation according to the transport in the url
130  static std::unique_ptr<RRawFile> Create(std::string_view url, ROptions options = ROptions());
131  /// Returns only the file location, e.g. "server/file" for http://server/file
132  static std::string GetLocation(std::string_view url);
133  /// Returns only the transport protocol in lower case, e.g. "http" for HTTP://server/file
134  static std::string GetTransport(std::string_view url);
135 
136  /**
137  * Buffered read from a random position. Returns the actual number of bytes read.
138  * Short reads indicate the end of the file
139  */
140  size_t ReadAt(void *buffer, size_t nbytes, std::uint64_t offset);
141  /// Read from fFilePos offset. Returns the actual number of bytes read.
142  size_t Read(void *buffer, size_t nbytes);
143  /// Change the cursor fFilePos
144  void Seek(std::uint64_t offset);
145  /// Returns the size of the file
146  std::uint64_t GetSize();
147 
148  /// Memory mapping according to POSIX standard; in particular, new mappings of the same range replace older ones.
149  /// Mappings need to be aligned at page boundaries, therefore the real offset can be smaller than the desired value.
150  /// Users become owner of the address returned by Map() and are responsible for calling Unmap() with the full length.
151  void *Map(size_t nbytes, std::uint64_t offset, std::uint64_t &mapdOffset);
152  /// Receives a pointer returned by Map() and should have nbytes set to the full length of the mapping
153  void Unmap(void *region, size_t nbytes);
154 
155  /// Derived classes shall inform the user about the supported functionality, which can possibly depend
156  /// on the file at hand
157  virtual int GetFeatures() const = 0;
158 
159  /// Read the next line starting from the current value of fFilePos. Returns false if the end of the file is reached.
160  bool Readln(std::string &line);
161 }; // class RRawFile
162 
163 } // namespace Internal
164 } // namespace ROOT
165 
166 #endif