Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
RDFUtils.cxx
Go to the documentation of this file.
1 // Author: Enrico Guiraud, Danilo Piparo CERN 03/2017
2 
3 /*************************************************************************
4  * Copyright (C) 1995-2018, Rene Brun and Fons Rademakers. *
5  * All rights reserved. *
6  * *
7  * For the licensing terms see $ROOTSYS/LICENSE. *
8  * For the list of contributors see $ROOTSYS/README/CREDITS. *
9  *************************************************************************/
10 
11 #include "RConfigure.h" // R__USE_IMT
12 #include "ROOT/RDataSource.hxx"
14 #include "RtypesCore.h"
15 #include "TBranch.h"
16 #include "TBranchElement.h"
17 #include "TClass.h"
18 #include "TClassEdit.h"
19 #include "TClassRef.h"
20 #include "TInterpreter.h"
21 #include "TLeaf.h"
22 #include "TObjArray.h"
23 #include "TROOT.h" // IsImplicitMTEnabled, GetImplicitMTPoolSize
24 #include "TTree.h"
25 
26 #include <stdexcept>
27 #include <string>
28 #include <typeinfo>
29 
30 using namespace ROOT::Detail::RDF;
31 using namespace ROOT::RDF;
32 
33 namespace ROOT {
34 namespace Internal {
35 namespace RDF {
36 
37 /// Return the type_info associated to a name. If the association fails, an
38 /// exception is thrown.
39 /// References and pointers are not supported since those cannot be stored in
40 /// columns.
41 const std::type_info &TypeName2TypeID(const std::string &name)
42 {
43  if (auto c = TClass::GetClass(name.c_str())) {
44  return *c->GetTypeInfo();
45  } else if (name == "char" || name == "Char_t")
46  return typeid(char);
47  else if (name == "unsigned char" || name == "UChar_t")
48  return typeid(unsigned char);
49  else if (name == "int" || name == "Int_t")
50  return typeid(int);
51  else if (name == "unsigned int" || name == "UInt_t")
52  return typeid(unsigned int);
53  else if (name == "short" || name == "Short_t")
54  return typeid(short);
55  else if (name == "unsigned short" || name == "UShort_t")
56  return typeid(unsigned short);
57  else if (name == "long" || name == "Long_t")
58  return typeid(long);
59  else if (name == "unsigned long" || name == "ULong_t")
60  return typeid(unsigned long);
61  else if (name == "double" || name == "Double_t")
62  return typeid(double);
63  else if (name == "float" || name == "Float_t")
64  return typeid(float);
65  else if (name == "long long" || name == "long long int" || name == "Long64_t")
66  return typeid(Long64_t);
67  else if (name == "unsigned long long" || name == "unsigned long long int" || name == "ULong64_t")
68  return typeid(ULong64_t);
69  else if (name == "bool" || name == "Bool_t")
70  return typeid(bool);
71  else {
72  std::string msg("Cannot extract type_info of type ");
73  msg += name.c_str();
74  msg += ".";
75  throw std::runtime_error(msg);
76  }
77 }
78 
79 /// Returns the name of a type starting from its type_info
80 /// An empty string is returned in case of failure
81 /// References and pointers are not supported since those cannot be stored in
82 /// columns.
83 std::string TypeID2TypeName(const std::type_info &id)
84 {
85  if (auto c = TClass::GetClass(id)) {
86  return c->GetName();
87  } else if (id == typeid(char))
88  return "char";
89  else if (id == typeid(unsigned char))
90  return "unsigned char";
91  else if (id == typeid(int))
92  return "int";
93  else if (id == typeid(unsigned int))
94  return "unsigned int";
95  else if (id == typeid(short))
96  return "short";
97  else if (id == typeid(unsigned short))
98  return "unsigned short";
99  else if (id == typeid(long))
100  return "long";
101  else if (id == typeid(unsigned long))
102  return "unsigned long";
103  else if (id == typeid(double))
104  return "double";
105  else if (id == typeid(float))
106  return "float";
107  else if (id == typeid(Long64_t))
108  return "Long64_t";
109  else if (id == typeid(ULong64_t))
110  return "ULong64_t";
111  else if (id == typeid(bool))
112  return "bool";
113  else
114  return "";
115 }
116 
117 std::string ComposeRVecTypeName(const std::string &valueType)
118 {
119  return "ROOT::VecOps::RVec<" + valueType + ">";
120 }
121 
122 std::string GetLeafTypeName(TLeaf *leaf, const std::string &colName)
123 {
124  std::string colType = leaf->GetTypeName();
125  if (colType.empty())
126  throw std::runtime_error("Could not deduce type of leaf " + colName);
127  if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() == 1) {
128  // this is a variable-sized array
129  colType = ComposeRVecTypeName(colType);
130  } else if (leaf->GetLeafCount() == nullptr && leaf->GetLenStatic() > 1) {
131  // this is a fixed-sized array (we do not differentiate between variable- and fixed-sized arrays)
132  colType = ComposeRVecTypeName(colType);
133  } else if (leaf->GetLeafCount() != nullptr && leaf->GetLenStatic() > 1) {
134  // we do not know how to deal with this branch
135  throw std::runtime_error("TTree leaf " + colName +
136  " has both a leaf count and a static length. This is not supported.");
137  }
138 
139  return colType;
140 }
141 
142 /// Return the typename of object colName stored in t, if any. Return an empty string if colName is not in t.
143 /// Supported cases:
144 /// - leaves corresponding to single values, variable- and fixed-length arrays, with following syntax:
145 /// - "leafname", as long as TTree::GetLeaf resolves it
146 /// - "b1.b2...leafname", as long as TTree::GetLeaf("b1.b2....", "leafname") resolves it
147 /// - TBranchElements, as long as TTree::GetBranch resolves their names
148 std::string GetBranchOrLeafTypeName(TTree &t, const std::string &colName)
149 {
150  // look for TLeaf either with GetLeaf(colName) or with GetLeaf(branchName, leafName) (splitting on last dot)
151  auto leaf = t.GetLeaf(colName.c_str());
152  if (!leaf) {
153  const auto dotPos = colName.find_last_of('.');
154  const auto hasDot = dotPos != std::string::npos;
155  if (hasDot) {
156  const auto branchName = colName.substr(0, dotPos);
157  const auto leafName = colName.substr(dotPos + 1);
158  leaf = t.GetLeaf(branchName.c_str(), leafName.c_str());
159  }
160  }
161  if (leaf)
162  return GetLeafTypeName(leaf, colName);
163 
164  // we could not find a leaf named colName, so we look for a TBranchElement
165  auto branch = t.GetBranch(colName.c_str());
166  if (branch) {
167  static const TClassRef tbranchelement("TBranchElement");
168  if (branch->InheritsFrom(tbranchelement)) {
169  auto be = static_cast<TBranchElement *>(branch);
170  if (auto currentClass = be->GetCurrentClass())
171  return currentClass->GetName();
172  else {
173  // Here we have a special case for getting right the type of data members
174  // of classes sorted in TClonesArrays: ROOT-9674
175  auto mother = be->GetMother();
176  if (mother && mother->InheritsFrom(tbranchelement)) {
177  auto beMom = static_cast<TBranchElement *>(mother);
178  auto beMomClass = beMom->GetClass();
179  if (beMomClass && 0 == strcmp("TClonesArray", beMomClass->GetName()))
180  return be->GetTypeName();
181  }
182  return be->GetClassName();
183  }
184  }
185  }
186 
187  // colName is not a leaf nor a TBranchElement
188  return std::string();
189 }
190 
191 /// Return a string containing the type of the given branch. Works both with real TTree branches and with temporary
192 /// column created by Define. Throws if type name deduction fails.
193 /// Note that for fixed- or variable-sized c-style arrays the returned type name will be RVec<T>.
194 /// vector2rvec specifies whether typename 'std::vector<T>' should be converted to 'RVec<T>' or returned as is
195 /// customColID is only used if isCustomColumn is true, and must correspond to the custom column's unique identifier
196 /// returned by its `GetID()` method.
197 std::string ColumnName2ColumnTypeName(const std::string &colName, unsigned int namespaceID, TTree *tree,
198  RDataSource *ds, bool isCustomColumn, bool vector2rvec, unsigned int customColID)
199 {
200  std::string colType;
201 
202  if (ds && ds->HasColumn(colName))
203  colType = ds->GetTypeName(colName);
204 
205  if (colType.empty() && tree) {
206  colType = GetBranchOrLeafTypeName(*tree, colName);
207  if (vector2rvec && TClassEdit::IsSTLCont(colType) == ROOT::ESTLType::kSTLvector) {
208  std::vector<std::string> split;
209  int dummy;
210  TClassEdit::GetSplit(colType.c_str(), split, dummy);
211  auto &valueType = split[1];
212  colType = ComposeRVecTypeName(valueType);
213  }
214  }
215 
216  if (colType.empty() && isCustomColumn) {
217  // this must be a temporary branch, we know there is an alias for its type
218  colType = "__rdf" + std::to_string(namespaceID) + "::" + colName + std::to_string(customColID) + "_type";
219  }
220 
221  if (colType.empty())
222  throw std::runtime_error("Column \"" + colName +
223  "\" is not in a dataset and is not a custom column been defined.");
224 
225  return colType;
226 }
227 
228 /// Convert type name (e.g. "Float_t") to ROOT type code (e.g. 'F') -- see TBranch documentation.
229 /// Return a space ' ' in case no match was found.
230 char TypeName2ROOTTypeName(const std::string &b)
231 {
232  if (b == "Char_t" || b == "char")
233  return 'B';
234  if (b == "UChar_t" || b == "unsigned char")
235  return 'b';
236  if (b == "Short_t" || b == "short" || b == "short int")
237  return 'S';
238  if (b == "UShort_t" || b == "unsigned short" || b == "unsigned short int")
239  return 's';
240  if (b == "Int_t" || b == "int")
241  return 'I';
242  if (b == "UInt_t" || b == "unsigned" || b == "unsigned int")
243  return 'i';
244  if (b == "Float_t" || b == "float")
245  return 'F';
246  if (b == "Double_t" || b == "double")
247  return 'D';
248  if (b == "Long64_t" || b == "long" || b == "long int")
249  return 'L';
250  if (b == "ULong64_t" || b == "unsigned long" || b == "unsigned long int")
251  return 'l';
252  if (b == "Bool_t" || b == "bool")
253  return 'O';
254  return ' ';
255 }
256 
257 unsigned int GetNSlots()
258 {
259  unsigned int nSlots = 1;
260 #ifdef R__USE_IMT
261  if (ROOT::IsImplicitMTEnabled())
262  nSlots = ROOT::GetImplicitMTPoolSize();
263 #endif // R__USE_IMT
264  return nSlots;
265 }
266 
267 /// Replace occurrences of '.' with '_' in each string passed as argument.
268 /// An Info message is printed when this happens. Dots at the end of the string are not replaced.
269 /// An exception is thrown in case the resulting set of strings would contain duplicates.
270 std::vector<std::string> ReplaceDotWithUnderscore(const std::vector<std::string> &columnNames)
271 {
272  auto newColNames = columnNames;
273  for (auto &col : newColNames) {
274  const auto dotPos = col.find('.');
275  if (dotPos != std::string::npos && dotPos != col.size() - 1 && dotPos != 0u) {
276  auto oldName = col;
277  std::replace(col.begin(), col.end(), '.', '_');
278  if (std::find(columnNames.begin(), columnNames.end(), col) != columnNames.end())
279  throw std::runtime_error("Column " + oldName + " would be written as " + col +
280  " but this column already exists. Please use Alias to select a new name for " +
281  oldName);
282  Info("Snapshot", "Column %s will be saved as %s", oldName.c_str(), col.c_str());
283  }
284  }
285 
286  return newColNames;
287 }
288 
289 void InterpreterDeclare(const std::string &code)
290 {
291  if (!gInterpreter->Declare(code.c_str())) {
292  const auto msg = "\nAn error occurred while jitting. The lines above might indicate the cause of the crash\n";
293  throw std::runtime_error(msg);
294  }
295 }
296 
297 Long64_t InterpreterCalc(const std::string &code, const std::string &context)
298 {
299  TInterpreter::EErrorCode errorCode(TInterpreter::kNoError);
300  auto res = gInterpreter->Calc(code.c_str(), &errorCode);
301  if (errorCode != TInterpreter::EErrorCode::kNoError) {
302  std::string msg = "\nAn error occurred while jitting";
303  if (!context.empty())
304  msg += " in " + context;
305  msg += ". The lines above might indicate the cause of the crash\n";
306  throw std::runtime_error(msg);
307  }
308  return res;
309 }
310 
311 } // end NS RDF
312 } // end NS Internal
313 } // end NS ROOT