Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RSqliteDS.hxx
Go to the documentation of this file.
1 // Author: Jakob Blomer CERN 07/2018
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2017, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #ifndef ROOT_RSQLITEDS
12 #define ROOT_RSQLITEDS
13 
14 #include "ROOT/RDataFrame.hxx"
15 #include "ROOT/RDataSource.hxx"
16 #include "ROOT/RStringView.hxx"
17 
18 #include <map>
19 #include <memory>
20 #include <mutex>
21 #include <string>
22 #include <vector>
23 
24 namespace ROOT {
25 
26 namespace RDF {
27 
28 namespace Internal {
29 // Members are defined in RSqliteDS.cxx in order to not pullute this header file with sqlite3.h
30 struct RSqliteDSDataSet;
31 }
32 
33 // clang-format off
34 /**
35 \class ROOT::RDF::RSqliteDS
36 \ingroup dataframe
37 \brief RSqliteDS is an RDF data source implementation for SQL result sets from sqlite3 files.
38 
39 The RSqliteDS is able to feed an RDataFrame with data from a SQlite SELECT query. One can use it like
40 
41  auto rdf = ROOT::RDF::MakeSqliteDataFrame("/path/to/file.sqlite", "select name from table");
42  auto h = rdf.Define("lName", "name.length()").Histo1D("lName");
43 
44 The data source has to provide column types for all the columns. Determining column types in SQlite is tricky
45 as it is dynamically typed and in principle each row can have different column types. The following heuristics
46 is used:
47 
48  - If a table column is queried as is ("SELECT colname FROM table"), the default/declared column type is taken.
49  - For expressions ("SELECT 1+1 FROM table"), the type of the first row of the result set determines the column type.
50  That can result in a column to be of thought of type NULL where subsequent rows actually have meaningful values.
51  The provided SELECT query can be used to avoid such ambiguities.
52 */
53 class RSqliteDS final : public ROOT::RDF::RDataSource {
54 private:
55  // clang-format off
56  /// All the types known to SQlite. Changes require changing fgTypeNames, too.
57  enum class ETypes {
58  kInteger,
59  kReal,
60  kText,
61  kBlob,
62  kNull
63  };
64  // clang-format on
65 
66  /// Used to hold a single "cell" of the SELECT query's result table. Can be changed to std::variant once available.
67  struct Value_t {
68  explicit Value_t(ETypes type);
69 
70  ETypes fType;
71  bool fIsActive; ///< Not all columns of the query are necessarily used by the RDF. Allows for skipping them.
72  Long64_t fInteger;
73  double fReal;
74  std::string fText;
75  std::vector<unsigned char> fBlob;
76  void *fNull;
77  void *fPtr; ///< Points to one of the values; an address to this pointer is returned by GetColumnReadersImpl.
78  };
79 
80  void SqliteError(int errcode);
81 
82  std::unique_ptr<Internal::RSqliteDSDataSet> fDataSet;
83  unsigned int fNSlots;
84  ULong64_t fNRow;
85  std::vector<std::string> fColumnNames;
86  std::vector<ETypes> fColumnTypes;
87  /// The data source is inherently single-threaded and returns only one row at a time. This vector holds the results.
88  std::vector<Value_t> fValues;
89 
90  // clang-format off
91  /// Corresponds to the types defined in ETypes.
92  static constexpr char const *fgTypeNames[] = {
93  "Long64_t",
94  "double",
95  "std::string",
96  "std::vector<unsigned char>",
97  "void *"
98  };
99  // clang-format on
100 
101 public:
102  RSqliteDS(const std::string &fileName, const std::string &query);
103  ~RSqliteDS();
104  void SetNSlots(unsigned int nSlots) final;
105  const std::vector<std::string> &GetColumnNames() const final;
106  bool HasColumn(std::string_view colName) const final;
107  std::string GetTypeName(std::string_view colName) const final;
108  std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges() final;
109  bool SetEntry(unsigned int slot, ULong64_t entry) final;
110  void Initialise() final;
111  std::string GetLabel() final;
112 
113 protected:
114  Record_t GetColumnReadersImpl(std::string_view name, const std::type_info &) final;
115 };
116 
117 RDataFrame MakeSqliteDataFrame(std::string_view fileName, std::string_view query);
118 
119 } // namespace RDF
120 
121 } // namespace ROOT
122 
123 #endif