Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
CpuTensor.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Authors: Sitong An, Lorenzo Moneta 10/2019
3 
4 /*************************************************************************
5  * Copyright (C) 2019, ROOT *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////
13 // Definition of the CpuTensor class used to represent //
14 // tensor data in deep neural nets (CNN, RNN, etc..) //
15 //////////////////////////////////////////////////////////
16 
17 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
18 #define TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
19 
20 #include <cstddef>
21 #include <vector>
22 
23 #include "TMatrix.h"
24 #include "TMVA/Config.h"
25 #include "CpuBuffer.h"
26 #include "CpuMatrix.h"
27 #include <TMVA/Config.h>
28 #include <TMVA/RTensor.hxx>
29 
30 namespace TMVA {
31 namespace DNN {
32 
33 // CPU Tensor Class
34 // It is a simple wrapper for TMVA RTensor based on
35 // memory owned by CPU Buffer
36 // We need to keep a pointer for CPUBuffer for fast conversion
37 // without copying to TCpuMatrix
38 // also provides compatibility with old interface
39 
40 template <typename AFloat>
41 class TCpuTensor : public TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>> {
42 
43 private:
44  //TCpuTensor will have no extra private members than RTensor
45 public:
46  friend class TCpuMatrix<AFloat>;
47 
48  using Shape_t = typename TMVA::Experimental::RTensor<AFloat>::Shape_t;
49  using MemoryLayout = TMVA::Experimental::MemoryLayout;
50  using Matrix_t = TCpuMatrix<AFloat>;
51  using Scalar_t = AFloat;
52 
53  // default constructor
54  TCpuTensor(): TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(0), {0})
55  {}
56 
57  /** constructors from n m */
58  TCpuTensor(size_t n, size_t m, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
59  : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(n * m), {n, m}, memlayout)
60  {}
61 
62  /** constructors from batch size, depth, height*width */
63  TCpuTensor(size_t bsize, size_t depth, size_t hw, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
64  : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * hw), {depth, hw, bsize}, memlayout)
65  {
66  if (memlayout == MemoryLayout::RowMajor)
67  this->ReshapeInplace({bsize, depth, hw});
68  }
69 
70  /** constructors from batch size, depth, height, width */
71  TCpuTensor(size_t bsize, size_t depth, size_t height, size_t width,
72  MemoryLayout memlayout = MemoryLayout::ColumnMajor)
73  : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * height * width),
74  {depth, height, width, bsize}, memlayout)
75  {
76  if (memlayout == MemoryLayout::RowMajor)
77  this->ReshapeInplace({bsize, depth, height, width});
78  }
79 
80  /** constructors from a shape.*/
81  TCpuTensor(Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
82  : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)),
83  shape, memlayout)
84  {}
85 
86  /* constructors from a AFloat pointer and a shape. This is a copy */
87 
88  TCpuTensor(AFloat *data, const Shape_t &shape,
89  MemoryLayout memlayout = MemoryLayout::ColumnMajor)
90  : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)), shape, memlayout)
91  {
92  auto& container = *(this->GetContainer());
93  for (size_t i = 0; i < this->GetSize(); ++i) container[i] = data[i];
94  }
95 
96 
97 
98  /** constructors from a TCpuBuffer and a shape */
99  //unsafe method for backwards compatibility, const not promised. A view.
100  TCpuTensor(const TCpuBuffer<AFloat>& buffer, Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
101  : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(buffer), shape, memlayout) {
102  R__ASSERT(this->GetSize() <= this->GetContainer()->GetSize());
103  }
104 
105 
106 
107  /** constructors from a TCpuMatrix. Memory layout is forced to be same as matrix (i.e. columnlayout) */
108  //unsafe method for backwards compatibility, const not promised. A view of underlying data.
109  TCpuTensor(const TCpuMatrix<AFloat> &matrix, size_t dim = 3, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
110  : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(matrix.GetBuffer()),{matrix.GetNrows(), matrix.GetNcols()}, memlayout)
111  {
112 
113  if (dim > 2) {
114  Shape_t shape = this->GetShape();
115 
116  if (this->GetLayout() == MemoryLayout::ColumnMajor) {
117  shape.insert(shape.end(),dim-2, 1);
118  } else {
119  shape.insert(shape.begin(), dim - 2, 1);
120  }
121  this->ReshapeInplace(shape);
122  }
123  }
124 
125 
126  /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
127  * elements. */
128 
129  operator TMatrixT<AFloat>() const {
130  // this should work only for size 2 or 4 tensors
131  if (this->GetShape().size() == 2 || (this->GetShape().size() == 3 && GetFirstSize() == 1)) {
132  TCpuMatrix<AFloat> temp = GetMatrix();
133  return temp;
134  }
135  // convert as a flat vector
136  return TMatrixT<AFloat>(1, this->GetSize(), this->GetData());
137  }
138 
139 
140  /** Return raw pointer to the elements stored contiguously in column-major
141  * order. */
142  AFloat *GetRawDataPointer() { return *(this->GetContainer()); }
143  const AFloat *GetRawDataPointer() const { return *(this->GetContainer()); }
144 
145  // for same API as CudaTensor (device buffer is the CpuBuffer)
146  const TCpuBuffer<AFloat> & GetDeviceBuffer() const {return *(this->GetContainer());}
147  TCpuBuffer<AFloat> & GetDeviceBuffer() {return *(this->GetContainer());}
148 
149 
150  size_t GetNoElements() const { return this->GetSize(); }
151 
152  // return the size of the first dimension (if in row order) or last dimension if in column order
153  // Tensor is F x H x W x...for row order layout FHWC
154  // or H x W x ... x F for column order layout CHWF
155  // logic copied from TCudaTensor
156  size_t GetFirstSize() const
157  {
158  auto& shape = this->GetShape();
159  return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.back() : shape.front();
160  }
161 
162  size_t GetCSize() const
163  {
164  auto& shape = this->GetShape();
165  if (shape.size() == 2) return 1;
166  return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.front() : shape[1]; // assume NHWC
167  }
168  //
169  size_t GetHSize() const
170  {
171  auto& shape = this->GetShape();
172  if (shape.size() == 2) return shape[0];
173  if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[0] : shape[1] ;
174  if (shape.size() >= 4) return shape[2] ;
175  return 0;
176 
177  }
178  size_t GetWSize() const
179  {
180  auto& shape = this->GetShape();
181  if (shape.size() == 2) return shape[1];
182  if (shape.size() == 3) return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[1] : shape[2] ;
183  if (shape.size() >= 4) return shape[3] ;
184  return 0;
185 
186  }
187 
188  // for backward compatibility (assume column-major
189  // for backward compatibility : for CM tensor (n1,n2,n3,n4) -> ( n1*n2*n3, n4)
190  // for RM tensor (n1,n2,n3,n4) -> ( n2*n3*n4, n1 ) ???
191  size_t GetNrows() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetStrides().back() : this->GetShape().front();}
192  size_t GetNcols() const { return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetShape().back() : this->GetStrides().front(); }
193 
194 
195  MemoryLayout GetLayout() const { return this->GetMemoryLayout(); }
196 
197  //this will be an unsafe view. Method exists for backwards compatibility only
198  TCpuMatrix<AFloat> GetMatrix() const
199  {
200  size_t ndims = 0;
201  auto& shape = this->GetShape();
202  //check if squeezable but do not actually squeeze
203  for (auto& shape_i : shape){
204  if (shape_i != 1) {
205  ndims++;
206  }
207  }
208  assert(ndims <= 2 && shape.size() > 1); // to support shape cases {n,1}
209  return TCpuMatrix<AFloat>(*(this->GetContainer()), GetHSize(), GetWSize());
210  }
211 
212  // Create copy, replace and return
213  TCpuTensor<AFloat> Reshape(Shape_t shape) const
214  {
215  TCpuTensor<AFloat> x(*this);
216  x.ReshapeInplace(shape);
217  return x;
218  }
219 
220  // return a view of slices in the first dimension (if row wise) or last dimension if colun wise
221  // so single event slices
222  TCpuTensor<AFloat> At(size_t i)
223  {
224  auto &shape = this->GetShape();
225  auto layout = this->GetMemoryLayout();
226  Shape_t sliced_shape = (layout == MemoryLayout::RowMajor) ? Shape_t(shape.begin() + 1, shape.end())
227  : Shape_t(shape.begin(), shape.end() - 1);
228 
229  size_t buffsize = (layout == MemoryLayout::RowMajor) ? this->GetStrides().front() : this->GetStrides().back();
230  size_t offset = i * buffsize;
231 
232  return TCpuTensor<AFloat>(this->GetContainer()->GetSubBuffer(offset, buffsize), sliced_shape, layout);
233  }
234 
235  TCpuTensor<AFloat> At(size_t i) const { return (const_cast<TCpuTensor<AFloat> &>(*this)).At(i); }
236 
237  // set all the tensor contents to zero
238  void Zero()
239  {
240  AFloat *data = *(this->GetContainer());
241  for (size_t i = 0; i < this->GetSize(); ++i)
242  data[i] = 0;
243  }
244 
245  // access single element - assume tensor dim is 2
246  AFloat &operator()(size_t i, size_t j)
247  {
248  auto &shape = this->GetShape();
249  assert(shape.size() == 2);
250  return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (*(this->GetContainer()))[i * shape[1] + j]
251  : (*(this->GetContainer()))[j * shape[0] + i];
252  }
253 
254  // access single element - assume tensor dim is 3. First index i is always the major indipendent of row-major or
255  // column major row- major I - J - K . Column- major is J - K - I
256  AFloat &operator()(size_t i, size_t j, size_t k)
257  {
258  auto &shape = this->GetShape();
259  assert(shape.size() == 3);
260 
261  return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
262  ? (*(this->GetContainer()))[i * shape[1] * shape[2] + j * shape[2] + k]
263  : (*(this->GetContainer()))[i * shape[0] * shape[1] + k * shape[0] + j]; // note that is J-K-I
264  }
265 
266  // access single element - assume tensor dim is 2
267  AFloat operator()(size_t i, size_t j) const
268  {
269  auto &shape = this->GetShape();
270  assert(shape.size() == 2);
271  return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (this->GetData())[i * shape[1] + j]
272  : (this->GetData())[j * shape[0] + i];
273  }
274 
275  AFloat operator()(size_t i, size_t j, size_t k) const
276  {
277  auto &shape = this->GetShape();
278  assert(shape.size() == 3);
279 
280  return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
281  ? (this->GetData())[i * shape[1] * shape[2] + j * shape[2] + k]
282  : (this->GetData())[i * shape[0] * shape[1] + k * shape[0] + j]; // note that is J-K-I
283  }
284 
285  /** Map the given function over the matrix elements. Executed in parallel
286  * using TThreadExecutor. */
287  template <typename Function_t>
288  void Map(Function_t & f);
289 
290  /** Same as maps but takes the input values from the tensor \p A and writes
291  * the results in this tensor. */
292  template <typename Function_t>
293  void MapFrom(Function_t & f, const TCpuTensor<AFloat> &A);
294 
295  size_t GetBufferUseCount() const { return this->GetContainer()->GetUseCount(); }
296 
297  void Print(const char *name = "Tensor") const
298  {
299  PrintShape(name);
300 
301  for (size_t i = 0; i < this->GetSize(); i++)
302  std::cout << (this->GetData())[i] << " ";
303  std::cout << std::endl;
304  }
305  void PrintShape(const char *name = "Tensor") const
306  {
307  std::string memlayout = (GetLayout() == MemoryLayout::RowMajor) ? "RowMajor" : "ColMajor";
308  std::cout << name << " shape : { ";
309  auto &shape = this->GetShape();
310  for (size_t i = 0; i < shape.size() - 1; ++i)
311  std::cout << shape[i] << " , ";
312  std::cout << shape.back() << " } "
313  << " Layout : " << memlayout << std::endl;
314  }
315 };
316 
317 //______________________________________________________________________________
318 template <typename AFloat>
319 template <typename Function_t>
320 inline void TCpuTensor<AFloat>::Map(Function_t &f)
321 {
322  AFloat *data = GetRawDataPointer();
323  size_t nelements = GetNoElements();
324  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
325 
326  auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
327  size_t jMax = std::min(workerID + nsteps, nelements);
328  for (size_t j = workerID; j < jMax; ++j) {
329  data[j] = f(data[j]);
330  }
331  return 0;
332  };
333 
334  if (nsteps < nelements) {
335  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
336 
337  // for (size_t i = 0; i < nelements; i+=nsteps)
338  // ff(i);
339 
340  } else {
341  R__ASSERT(nelements == nsteps);
342  ff(0);
343  }
344 }
345 
346 //______________________________________________________________________________
347 template <typename AFloat>
348 template <typename Function_t>
349 inline void TCpuTensor<AFloat>::MapFrom(Function_t &f, const TCpuTensor<AFloat> &A)
350 {
351  AFloat *dataB = GetRawDataPointer();
352  const AFloat *dataA = A.GetRawDataPointer();
353 
354  size_t nelements = GetNoElements();
355  R__ASSERT(nelements == A.GetNoElements());
356  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
357 
358  auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
359  size_t jMax = std::min(workerID + nsteps, nelements);
360  for (size_t j = workerID; j < jMax; ++j) {
361  dataB[j] = f(dataA[j]);
362  }
363  return 0;
364  };
365  if (nsteps < nelements) {
366  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
367  // for (size_t i = 0; i < nelements; i+=nsteps)
368  // ff(i);
369 
370  } else {
371  R__ASSERT(nelements == nsteps);
372  ff(0);
373  }
374 }
375 
376 
377 } // namespace DNN
378 } // namespace TMVA
379 
380 #endif