Logo ROOT   6.30.04
Reference Guide
 All Namespaces Files Pages
CpuMatrix.h
Go to the documentation of this file.
1 // @(#)root/tmva/tmva/dnn:$Id$
2 // Author: Simon Pfreundschuh 20/07/16
3 
4 /*************************************************************************
5  * Copyright (C) 2016, Simon Pfreundschuh *
6  * All rights reserved. *
7  * *
8  * For the licensing terms see $ROOTSYS/LICENSE. *
9  * For the list of contributors see $ROOTSYS/README/CREDITS. *
10  *************************************************************************/
11 
12 //////////////////////////////////////////////////////////
13 // Definition of the CpuMatrix class used to represent //
14 // weight and bias matrices in neural nets. //
15 //////////////////////////////////////////////////////////
16 
17 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
18 #define TMVA_DNN_ARCHITECTURES_CPU_CPUMATRIX
19 
20 #ifdef R__USE_IMT
21 #define DL_USE_MTE // use MT with tbb
22 #endif
23 
24 #include <cstddef>
25 #include <vector>
26 
27 #include "TMatrix.h"
28 #include "TMVA/Config.h"
29 #include "CpuBuffer.h"
30 #include <TMVA/Config.h>
31 
32 // #define DEBUG_TMVA_TCPUMATRIX
33 #if defined(DEBUG_TMVA_TCPUMATRIX)
34 /*
35  * Debug(!) function for printing matrices.
36  *
37  * Prints the input expression `mat` using preprocessor directives (with
38  * `#mat`). E.g. `PrintMatrix(matA, "Test")` _could_ generate
39  * "matA is null pointer".
40  *
41  * Note: This is a preprocessor macro. It does _not_ respect namespaces.
42  *
43  * @param mat Matrix to print
44  * @param text Name of matrix
45  */
46 #define TMVA_DNN_PrintTCpuMatrix(mat, text) \
47  { \
48  auto _dpointer = mat.GetRawDataPointer(); \
49  if (_dpointer == NULL) { \
50  std::cout << #mat << " is null pointer" << std::endl; \
51  exit(1); \
52  } \
53  auto _nrows = mat.GetNrows(); \
54  auto _ncols = mat.GetNcols(); \
55  std::cout << "---------------------" << text << " " << #mat << "(" << _nrows << "," << _ncols << ")" \
56  << "--------------------" << std::endl; \
57  for (size_t _i = 0; _i < _nrows; _i++) { \
58  for (size_t _j = 0; _j < _ncols; _j++) { \
59  std::cout << mat(_i, _j); \
60  if (_j < _ncols - 1) \
61  std::cout << ","; \
62  } \
63  std::cout << std::endl; \
64  } \
65  }
66 #else
67 #define TMVA_DNN_PrintTCpuMatrix(mat, text)
68 #endif
69 
70 namespace TMVA {
71 namespace DNN {
72 
73 /** The TCpuMatrix class.
74  *
75  * Matrix class for multi-threaded CPU architectures. Uses the TCpuBuffer
76  * class to store the matrices in column-major format for compatibility with
77  * BLAS. Provides Map and MapFrom member functions to simplify the application of
78  * activation functions and derivatives to matrices.
79  *
80  * Copying and assignment of TCpuMatrix objects only performs shallow copies, i.e.
81  * copying is fast and the resulting objects share the element data.
82  *
83  * \tparam AFloat The floating point type used to represent the matrix elements.
84  */
85 //______________________________________________________________________________
86 template <typename AFloat>
87 class TCpuMatrix {
88 private:
89  static std::vector<AFloat> fOnes; ///< Vector filled with ones used for BLAS calls.
90 
91 public:
92  TCpuBuffer<AFloat> fBuffer; ///< The buffer holding the matrix elements
93  ///< in column-major format.
94 private:
95  size_t fNCols;
96  size_t fNRows;
97 
98 public:
99  // friend class TCpuTensor<AFloat>;
100 
101  /** Returns pointer to a vector holding only ones with a guaranteed length
102  * of the number of columns of every instantiated CpuMatrix object. */
103 
104 
105  TCpuBuffer<AFloat>& GetBuffer() {return fBuffer;}
106  const TCpuBuffer<AFloat>& GetBuffer() const {return fBuffer;}
107 
108 
109  static const AFloat *GetOnePointer() { return fOnes.data(); }
110 
111  static size_t GetOnePointerSize() { return fOnes.size(); }
112 
113  static void InitializeOneVector(size_t n);
114 
115  TCpuMatrix() : fNCols(0), fNRows(0) {}
116 
117  /** Construct matrix and allocate space for its elements. */
118  TCpuMatrix(size_t nRows, size_t nCols);
119  /** Construct a TCpuMatrix object by (deeply) copying from a
120  * TMatrixT<Double_t> matrix. */
121  TCpuMatrix(const TMatrixT<AFloat> &);
122  /** Construct a m-times-n matrix from the given buffer. The size must of
123  * course match. */
124  TCpuMatrix(const TCpuBuffer<AFloat> &buffer, size_t m, size_t n);
125 
126  // N.B the default copy constructor does a shallow copy (NOT a deep one) !
127  TCpuMatrix(const TCpuMatrix &) = default;
128  TCpuMatrix(TCpuMatrix &&) = default;
129  TCpuMatrix &operator=(const TCpuMatrix &) = default;
130  TCpuMatrix &operator=(TCpuMatrix &&) = default;
131  ~TCpuMatrix() = default;
132 
133  /** Clear content of the matrix and initialize to zero elements
134  */
135  void Zero();
136 
137  /** Convert to a TMatrixT<AFloat_t> object. Performs a deep copy of the matrix
138  * elements. */
139  operator TMatrixT<AFloat>() const;
140 
141  /** Map the given function over the matrix elements. Executed in parallel
142  * using TThreadExecutor. */
143  template <typename Function_t>
144  void Map(Function_t &f);
145 
146  /** Same as maps but takes the input values from the matrix \p A and writes
147  * the results in this matrix. */
148  template <typename Function_t>
149  void MapFrom(Function_t &f, const TCpuMatrix &A);
150 
151  size_t GetNrows() const { return fNRows; }
152  size_t GetNcols() const { return fNCols; }
153  size_t GetNoElements() const { return fNRows * fNCols; }
154  size_t GetSize() const { return fNRows * fNCols; }
155 
156  /** Return matrix element in row \p i and column \p j. */
157  AFloat operator()(size_t i, size_t j) const { return fBuffer[j * fNRows + i]; }
158  AFloat &operator()(size_t i, size_t j) { return fBuffer[j * fNRows + i]; }
159 
160  /** Return raw pointer to the elements stored contiguously in column-major
161  * order. */
162  AFloat *GetRawDataPointer() { return fBuffer; }
163  const AFloat *GetRawDataPointer() const { return fBuffer; }
164 
165  static Executor &GetThreadExecutor() { return TMVA::Config::Instance().GetThreadExecutor(); }
166 
167  // static function to get the number of elements for task
168  static size_t GetNWorkItems(size_t nelements);
169 
170  // print matrix
171  void Print() const
172  {
173  TCpuMatrix cpuMatrix = *this;
174  TMVA_DNN_PrintTCpuMatrix(cpuMatrix, "CpuMatrix");
175  }
176 
177 private:
178  void Initialize();
179 };
180 
181 template <typename AFloat>
182 std::vector<AFloat> TCpuMatrix<AFloat>::fOnes{};
183 
184 // Inline Functions.
185 //______________________________________________________________________________
186 template <typename AFloat>
187 size_t TCpuMatrix<AFloat>::GetNWorkItems(size_t nElements)
188 {
189  // nElements should have at least 100
190  // const size_t nWorkers = TMVA::Config::Instance().GetNCpu();
191  // return (nElements > nWorkers) ? (int) nElements/nWorkers : 1;
192  const size_t minElements = 1000;
193  const size_t nCpu = TMVA::Config::Instance().GetNCpu();
194  if (nElements <= minElements)
195  return nElements;
196  if (nElements < nCpu * minElements) {
197  size_t nt = nElements / minElements;
198  return nElements / nt;
199  }
200  return nElements / nCpu;
201  // if (nElements < nCpu*20) return nElements/nCpu;
202  // return nElements/(nCpu*10);
203 }
204 
205 //______________________________________________________________________________
206 template <typename AFloat>
207 template <typename Function_t>
208 inline void TCpuMatrix<AFloat>::Map(Function_t &f)
209 {
210  AFloat *data = GetRawDataPointer();
211  size_t nelements = GetNoElements();
212  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
213 
214  auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
215  size_t jMax = std::min(workerID + nsteps, nelements);
216  for (size_t j = workerID; j < jMax; ++j) {
217  data[j] = f(data[j]);
218  }
219  return 0;
220  };
221 
222  if (nsteps < nelements) {
223  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
224 
225  // for (size_t i = 0; i < nelements; i+=nsteps)
226  // ff(i);
227 
228  } else {
229  R__ASSERT(nelements == nsteps);
230  ff(0);
231  }
232 }
233 
234 //______________________________________________________________________________
235 template <typename AFloat>
236 template <typename Function_t>
237 inline void TCpuMatrix<AFloat>::MapFrom(Function_t &f, const TCpuMatrix &A)
238 {
239  AFloat *dataB = GetRawDataPointer();
240  const AFloat *dataA = A.GetRawDataPointer();
241 
242  size_t nelements = GetNoElements();
243  R__ASSERT(nelements == A.GetNoElements());
244  size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
245 
246  auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
247  size_t jMax = std::min(workerID + nsteps, nelements);
248  for (size_t j = workerID; j < jMax; ++j) {
249  dataB[j] = f(dataA[j]);
250  }
251  return 0;
252  };
253  if (nsteps < nelements) {
254  TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
255  // for (size_t i = 0; i < nelements; i+=nsteps)
256  // ff(i);
257 
258  } else {
259  R__ASSERT(nelements == nsteps);
260  ff(0);
261  }
262 }
263 //______________________________________________________________________________
264 template <typename AFloat>
265 void TCpuMatrix<AFloat>::Zero()
266 {
267  for (size_t j = 0; j < fNCols; j++) {
268  for (size_t i = 0; i < fNRows; i++) {
269  (*this)(i, j) = 0;
270  }
271  }
272 }
273 
274 } // namespace DNN
275 } // namespace TMVA
276 
277 #endif