Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
df026_AsNumpyArrays.py
Go to the documentation of this file.
1 ## \file
2 ## \ingroup tutorial_dataframe
3 ## \notebook
4 ## This tutorial shows how read data of a RDataFrame into Numpy arrays.
5 ##
6 ## \macro_code
7 ## \macro_output
8 ##
9 ## \date December 2018
10 ## \author Stefan Wunsch
11 
12 import ROOT
13 from sys import exit
14 
15 # Let's create a simple dataframe with ten rows and two columns
16 df = ROOT.RDataFrame(10) \
17  .Define("x", "(int)rdfentry_") \
18  .Define("y", "1.f/(1.f+rdfentry_)")
19 
20 # Next, we want to access the data from Python as Numpy arrays. To do so, the
21 # content of the dataframe is converted using the AsNumpy method. The returned
22 # object is a dictionary with the column names as keys and 1D numpy arrays with
23 # the content as values.
24 npy = df.AsNumpy()
25 print("Read-out of the full RDataFrame:\n{}\n".format(npy))
26 
27 # Since reading out data to memory is expensive, always try to read-out only what
28 # is needed for your analysis. You can use all RDataFrame features to reduce your
29 # dataset, e.g., the Filter transformation. Furthermore, you can can pass to the
30 # AsNumpy method a whitelist of column names with the option `columns` or a blacklist
31 # with column names with the option `exclude`.
32 df2 = df.Filter("x>5")
33 npy2 = df2.AsNumpy()
34 print("Read-out of the filtered RDataFrame:\n{}\n".format(npy2))
35 
36 npy3 = df2.AsNumpy(columns=["x"])
37 print("Read-out of the filtered RDataFrame with the columns option:\n{}\n".format(npy3))
38 
39 npy4 = df2.AsNumpy(exclude=["x"])
40 print("Read-out of the filtered RDataFrame with the exclude option:\n{}\n".format(npy4))
41 
42 # You can read-out all objects from ROOT files since these are wrapped by PyROOT
43 # in the Python world. However, be aware that objects other than fundamental types,
44 # such as complex C++ objects and not int or float, are costly to read-out.
45 ROOT.gInterpreter.Declare("""
46 // Inject the C++ class CustomObject in the C++ runtime.
47 class CustomObject {
48 public:
49  int x = 42;
50 };
51 // Create a function that returns such an object. This is called to fill the dataframe.
52 CustomObject fill_object() { return CustomObject(); }
53 """)
54 
55 df3 = df.Define("custom_object", "fill_object()")
56 npy5 = df3.AsNumpy()
57 print("Read-out of C++ objects:\n{}\n".format(npy5["custom_object"]))
58 print("Access to all methods and data members of the C++ object:\nObject: {}\nAccess data member: custom_object.x = {}\n".format(
59  repr(npy5["custom_object"][0]), npy5["custom_object"][0].x))
60 
61 # Note that you can pass the object returned by AsNumpy directly to pandas.DataFrame
62 # including any complex C++ object that may be read-out.
63 try:
64  import pandas
65 except:
66  print("Failed to import pandas.")
67  exit()
68 
69 df = pandas.DataFrame(npy5)
70 print("Content of the ROOT.RDataFrame as pandas.DataFrame:\n{}\n".format(df))