Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
ntpl004_dimuon.C
Go to the documentation of this file.
1 /// \file
2 /// \ingroup tutorial_ntuple
3 /// \notebook
4 /// Convert CMS open data from a TTree to RNTuple.
5 /// This tutorial illustrates data conversion and data processing with RNTuple and RDataFrame. In contrast to the
6 /// LHCb open data tutorial, the data model in this tutorial is not tabular but entries have variable lengths vectors
7 /// Based on RDataFrame's df102_NanoAODDimuonAnalysis.C
8 ///
9 /// \macro_image
10 /// \macro_code
11 ///
12 /// \date April 2019
13 /// \author The ROOT Team
14 
15 // NOTE: The RNTuple classes are experimental at this point.
16 // Functionality, interface, and data format is still subject to changes.
17 // Do not use for real data!
18 
19 #include <ROOT/RDataFrame.hxx>
20 #include <ROOT/RNTuple.hxx>
21 #include <ROOT/RNTupleDS.hxx>
22 #include <ROOT/RVec.hxx>
23 
24 #include <TCanvas.h>
25 #include <TH1D.h>
26 #include <TLatex.h>
27 #include <TStyle.h>
28 
29 #include <cassert>
30 #include <cmath>
31 #include <iostream>
32 #include <memory>
33 #include <string>
34 #include <vector>
35 #include <utility>
36 
37 // Import classes from experimental namespace for the time being
38 using RNTupleReader = ROOT::Experimental::RNTupleReader;
39 using RNTupleWriter = ROOT::Experimental::RNTupleWriter;
40 using RNTupleDS = ROOT::Experimental::RNTupleDS;
41 
42 constexpr char const* kTreeFileName = "http://root.cern.ch/files/NanoAOD_DoubleMuon_CMS2011OpenData.root";
43 constexpr char const* kNTupleFileName = "ntpl004_dimuon.root";
44 
45 
46 using ColNames_t = std::vector<std::string>;
47 
48 // This is a custom action for RDataFrame. It does not support parallelism!
49 // This action writes data from an RDataFrame entry into an ntuple. It is templated on the
50 // types of the columns to be written and can be used as a generic file format converter.
51 template <typename... ColumnTypes_t>
52 class RNTupleHelper : public ROOT::Detail::RDF::RActionImpl<RNTupleHelper<ColumnTypes_t...>> {
53 public:
54  using Result_t = RNTupleWriter;
55 private:
56  using ColumnValues_t = std::tuple<std::shared_ptr<ColumnTypes_t>...>;
57 
58  std::string fNTupleName;
59  std::string fRootFile;
60  ColNames_t fColNames;
61  ColumnValues_t fColumnValues;
62  static constexpr const auto fNColumns = std::tuple_size<ColumnValues_t>::value;
63  std::shared_ptr<RNTupleWriter> fNTuple;
64  int fCounter;
65 
66  template<std::size_t... S>
67  void InitializeImpl(std::index_sequence<S...>) {
68  auto eventModel = ROOT::Experimental::RNTupleModel::Create();
69  // Create the fields and the shared pointers to the connected values
70  std::initializer_list<int> expander{
71  (std::get<S>(fColumnValues) = eventModel->MakeField<ColumnTypes_t>(fColNames[S]), 0)...};
72  fNTuple = std::move(RNTupleWriter::Recreate(std::move(eventModel), fNTupleName, fRootFile));
73  }
74 
75  template<std::size_t... S>
76  void ExecImpl(std::index_sequence<S...>, ColumnTypes_t... values) {
77  // For every entry, set the destination of the ntuple's default entry's shared pointers to the given values,
78  // which are provided by RDataFrame
79  std::initializer_list<int> expander{(*std::get<S>(fColumnValues) = values, 0)...};
80  }
81 
82 public:
83  RNTupleHelper(std::string_view ntupleName, std::string_view rootFile, const ColNames_t& colNames)
84  : fNTupleName(ntupleName), fRootFile(rootFile), fColNames(colNames)
85  {
86  InitializeImpl(std::make_index_sequence<fNColumns>());
87  }
88 
89  RNTupleHelper(RNTupleHelper&&) = default;
90  RNTupleHelper(const RNTupleHelper&) = delete;
91  std::shared_ptr<RNTupleWriter> GetResultPtr() const { return fNTuple; }
92 
93  void Initialize()
94  {
95  fCounter = 0;
96  }
97 
98  void InitTask(TTreeReader *, unsigned int) {}
99 
100  /// This is a method executed at every entry
101  void Exec(unsigned int slot, ColumnTypes_t... values)
102  {
103  // Populate the ntuple's fields data locations with the provided values, then write to disk
104  ExecImpl(std::make_index_sequence<fNColumns>(), values...);
105  fNTuple->Fill();
106  if (++fCounter % 100000 == 0)
107  std::cout << "Wrote " << fCounter << " entries" << std::endl;
108  }
109 
110  void Finalize()
111  {
112  fNTuple->CommitCluster();
113  }
114 
115  std::string GetActionName() { return "RNTuple Writer"; }
116 };
117 
118 
119 /// A wrapper for ROOT's InvariantMass function that takes std::vector instead of RVecs
120 template <typename T>
121 T InvariantMassStdVector(std::vector<T>& pt, std::vector<T>& eta, std::vector<T>& phi, std::vector<T>& mass)
122 {
123  assert(pt.size() == 2 && eta.size() == 2 && phi.size() == 2 && mass.size() == 2);
124 
125  // We adopt the memory here, no copy
126  ROOT::RVec<float> rvPt(pt);
127  ROOT::RVec<float> rvEta(eta);
128  ROOT::RVec<float> rvPhi(phi);
129  ROOT::RVec<float> rvMass(mass);
130 
131  return InvariantMass(rvPt, rvEta, rvPhi, rvMass);
132 }
133 
134 // We use an RDataFrame custom snapshotter to convert between TTree and RNTuple.
135 // The snapshotter is templated; we construct the conversion C++ code as a string and hand it over to Cling
136 void Convert() {
137  // Use df to list the branch types and names of the input tree
138  ROOT::RDataFrame df("Events", kTreeFileName);
139 
140  // Construct the types for the template instantiation and the column names from the dataframe
141  std::string typeList = "<";
142  std::string columnList = "{";
143  auto columnNames = df.GetColumnNames();
144  for (auto name : columnNames) {
145  auto typeName = df.GetColumnType(name);
146  // Skip ULong64_t for the time being, RNTuple support will be added at a later point
147  if (typeName == "ULong64_t") continue;
148  columnList += "\"" + name + "\",";
149  typeList += typeName + ",";
150  }
151  *columnList.rbegin() = '}';
152  *typeList.rbegin() = '>';
153 
154  std::string code = "{";
155  // Convert the first 4 million events
156  code += "auto df = std::make_unique<ROOT::RDataFrame>(\"Events\", \"" + std::string(kTreeFileName)
157  + "\")->Range(0, 4000000);";
158  code += "ColNames_t colNames = " + columnList + ";";
159  code += "RNTupleHelper" + typeList + " helper{\"Events\", \"" + std::string(kNTupleFileName) + "\", colNames};";
160  code += "*df.Book" + typeList + "(std::move(helper), colNames);";
161  code += "}";
162 
163  gInterpreter->ProcessLine(code.c_str());
164 }
165 
166 
167 void ntpl004_dimuon() {
168  Convert();
169 
170  // Enable mutli-threading only after the conversion because we use RDF's Range() in it,
171  // which currently does not support multi-threading
172  ROOT::EnableImplicitMT();
173 
174  auto df = ROOT::Experimental::MakeNTupleDataFrame("Events", kNTupleFileName);
175 
176  // As of this point, the tutorial is identical to df102_NanoAODDimuonAnalysis except the use of
177  // InvariantMassStdVector instead of InvariantMass
178 
179  // For simplicity, select only events with exactly two muons and require opposite charge
180  auto df_2mu = df.Filter("nMuon == 2", "Events with exactly two muons");
181  auto df_os = df_2mu.Filter("Muon_charge[0] != Muon_charge[1]", "Muons with opposite charge");
182 
183  // Compute invariant mass of the dimuon system
184  auto df_mass = df_os.Define("Dimuon_mass", InvariantMassStdVector<float>, {"Muon_pt", "Muon_eta", "Muon_phi", "Muon_mass"});
185  auto df_size = df_os.Define("eta_size", "Muon_mass.size()");
186 
187  // Make histogram of dimuon mass spectrum
188  auto h = df_mass.Histo1D({"Dimuon_mass", "Dimuon_mass", 30000, 0.25, 300}, "Dimuon_mass");
189 
190  // Request cut-flow report
191  auto report = df_mass.Report();
192 
193  // Produce plot
194  gStyle->SetOptStat(0); gStyle->SetTextFont(42);
195  auto c = new TCanvas("c", "", 800, 700);
196  c->SetLogx(); c->SetLogy();
197 
198  h->SetTitle("");
199  h->GetXaxis()->SetTitle("m_{#mu#mu} (GeV)"); h->GetXaxis()->SetTitleSize(0.04);
200  h->GetYaxis()->SetTitle("N_{Events}"); h->GetYaxis()->SetTitleSize(0.04);
201  h->DrawCopy();
202 
203  TLatex label; label.SetNDC(true);
204  label.DrawLatex(0.175, 0.740, "#eta");
205  label.DrawLatex(0.205, 0.775, "#rho,#omega");
206  label.DrawLatex(0.270, 0.740, "#phi");
207  label.DrawLatex(0.400, 0.800, "J/#psi");
208  label.DrawLatex(0.415, 0.670, "#psi'");
209  label.DrawLatex(0.485, 0.700, "Y(1,2,3S)");
210  label.DrawLatex(0.755, 0.680, "Z");
211  label.SetTextSize(0.040); label.DrawLatex(0.100, 0.920, "#bf{CMS Open Data}");
212  label.SetTextSize(0.030); label.DrawLatex(0.630, 0.920, "#sqrt{s} = 8 TeV, L_{int} = 11.6 fb^{-1}");
213 
214  // Print cut-flow report
215  report->Print();
216 }