Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
df007_snapshot.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_dataframe
3 /// \notebook -draw
4 /// This tutorial shows how to write out datasets in ROOT formatusing the RDataFrame
5 /// \macro_code
6 ///
7 /// \date April 2017
8 /// \author Danilo Piparo
9 
10 // A simple helper function to fill a test tree: this makes the example
11 // stand-alone.
12 void fill_tree(const char *treeName, const char *fileName)
13 {
14  ROOT::RDataFrame d(10000);
15  int i(0);
16  d.Define("b1", [&i]() { return i; })
17  .Define("b2",
18  [&i]() {
19  float j = i * i;
20  ++i;
21  return j;
22  })
23  .Snapshot(treeName, fileName);
24 }
25 
26 int df007_snapshot()
27 {
28  // We prepare an input tree to run on
29  auto fileName = "df007_snapshot.root";
30  auto outFileName = "df007_snapshot_output.root";
31  auto outFileNameAllColumns = "df007_snapshot_output_allColumns.root";
32  auto treeName = "myTree";
33  fill_tree(treeName, fileName);
34 
35  // We read the tree from the file and create a RDataFrame.
36  ROOT::RDataFrame d(treeName, fileName);
37 
38  // ## Select entries
39  // We now select some entries in the dataset
40  auto d_cut = d.Filter("b1 % 2 == 0");
41  // ## Enrich the dataset
42  // Build some temporary columns: we'll write them out
43  auto d2 = d_cut.Define("b1_square", "b1 * b1")
44  .Define("b2_vector",
45  [](float b2) {
46  std::vector<float> v;
47  for (int i = 0; i < 3; i++)
48  v.push_back(b2 * i);
49  return v;
50  },
51  {"b2"});
52 
53  // ## Write it to disk in ROOT format
54  // We now write to disk a new dataset with one of the variables originally
55  // present in the tree and the new variables.
56  // The user can explicitly specify the types of the columns as template
57  // arguments of the Snapshot method, otherwise they will be automatically
58  // inferred.
59  d2.Snapshot(treeName, outFileName, {"b1", "b1_square", "b2_vector"});
60 
61  // Open the new file and list the columns of the tree
62  TFile f1(outFileName);
63  auto t = f1.Get<TTree>(treeName);
64  std::cout << "These are the columns b1, b1_square and b2_vector:" << std::endl;
65  for (auto branch : *t->GetListOfBranches()) {
66  std::cout << "Branch: " << branch->GetName() << std::endl;
67  }
68  f1.Close();
69 
70  // We are not forced to write the full set of column names. We can also
71  // specify a regular expression for that. In case nothing is specified, all
72  // columns are persistified.
73  d2.Snapshot(treeName, outFileNameAllColumns);
74 
75  // Open the new file and list the columns of the tree
76  TFile f2(outFileNameAllColumns);
77  t = f2.Get<TTree>(treeName);
78  std::cout << "These are all the columns available to this tdf:" << std::endl;
79  for (auto branch : *t->GetListOfBranches()) {
80  std::cout << "Branch: " << branch->GetName() << std::endl;
81  }
82  f2.Close();
83 
84  // We can also get a fresh RDataFrame out of the snapshot and restart the
85  // analysis chain from it. The default columns are the one selected.
86  // Notice also how we can decide to be more explicit with the types of the
87  // columns.
88  auto snapshot_tdf = d2.Snapshot<int>(treeName, outFileName, {"b1_square"});
89  auto h = snapshot_tdf->Histo1D();
90  auto c = new TCanvas();
91  h->DrawClone();
92 
93  return 0;
94 }