28 class RCsvDS final :
public ROOT::RDF::RDataSource {
32 using ColType_t = char;
33 static const std::map<ColType_t, std::string> fgColTypeMap;
35 std::streampos fDataPos = 0;
36 bool fReadHeaders =
false;
37 unsigned int fNSlots = 0U;
38 std::ifstream fStream;
39 const char fDelimiter;
40 const Long64_t fLinesChunkSize;
41 ULong64_t fEntryRangesRequested = 0ULL;
42 ULong64_t fProcessedLines = 0ULL;
43 std::vector<std::string> fHeaders;
44 std::map<std::string, ColType_t> fColTypes;
45 std::list<ColType_t> fColTypesList;
46 std::vector<std::vector<void *>> fColAddresses;
47 std::vector<Record_t> fRecords;
48 std::vector<std::vector<double>> fDoubleEvtValues;
49 std::vector<std::vector<Long64_t>> fLong64EvtValues;
50 std::vector<std::vector<std::string>> fStringEvtValues;
53 std::vector<std::deque<bool>> fBoolEvtValues;
55 static TRegexp intRegex, doubleRegex1, doubleRegex2, doubleRegex3, trueRegex, falseRegex;
57 void FillHeaders(
const std::string &);
58 void FillRecord(
const std::string &, Record_t &);
59 void GenerateHeaders(
size_t);
60 std::vector<void *> GetColumnReadersImpl(std::string_view,
const std::type_info &);
61 void InferColTypes(std::vector<std::string> &);
62 void InferType(
const std::string &,
unsigned int);
63 std::vector<std::string> ParseColumns(
const std::string &);
64 size_t ParseValue(
const std::string &, std::vector<std::string> &,
size_t);
65 ColType_t GetType(std::string_view colName)
const;
68 std::string AsString();
71 RCsvDS(std::string_view fileName,
bool readHeaders =
true,
char delimiter =
',', Long64_t linesChunkSize = -1LL);
75 const std::vector<std::string> &GetColumnNames()
const;
76 std::vector<std::pair<ULong64_t, ULong64_t>> GetEntryRanges();
77 std::string GetTypeName(std::string_view colName)
const;
78 bool HasColumn(std::string_view colName)
const;
79 bool SetEntry(
unsigned int slot, ULong64_t entry);
80 void SetNSlots(
unsigned int nSlots);
81 std::string GetLabel();
90 RDataFrame MakeCsvDataFrame(std::string_view fileName,
bool readHeaders =
true,
char delimiter =
',',
91 Long64_t linesChunkSize = -1LL);