17 #ifndef TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
18 #define TMVA_DNN_ARCHITECTURES_CPU_CPUTENSOR
40 template <
typename AFloat>
41 class TCpuTensor :
public TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>> {
46 friend class TCpuMatrix<AFloat>;
48 using Shape_t =
typename TMVA::Experimental::RTensor<AFloat>::Shape_t;
49 using MemoryLayout = TMVA::Experimental::MemoryLayout;
50 using Matrix_t = TCpuMatrix<AFloat>;
51 using Scalar_t = AFloat;
54 TCpuTensor(): TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(0), {0})
58 TCpuTensor(
size_t n,
size_t m, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
59 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(n * m), {n, m}, memlayout)
63 TCpuTensor(
size_t bsize,
size_t depth,
size_t hw, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
64 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * hw), {depth, hw, bsize}, memlayout)
66 if (memlayout == MemoryLayout::RowMajor)
67 this->ReshapeInplace({bsize, depth, hw});
71 TCpuTensor(
size_t bsize,
size_t depth,
size_t height,
size_t width,
72 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
73 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(bsize * depth * height * width),
74 {depth, height, width, bsize}, memlayout)
76 if (memlayout == MemoryLayout::RowMajor)
77 this->ReshapeInplace({bsize, depth, height, width});
81 TCpuTensor(Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
82 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)),
88 TCpuTensor(AFloat *data,
const Shape_t &shape,
89 MemoryLayout memlayout = MemoryLayout::ColumnMajor)
90 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(TMVA::Experimental::Internal::GetSizeFromShape(shape)), shape, memlayout)
92 auto& container = *(this->GetContainer());
93 for (
size_t i = 0; i < this->GetSize(); ++i) container[i] = data[i];
100 TCpuTensor(
const TCpuBuffer<AFloat>& buffer, Shape_t shape, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
101 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(buffer), shape, memlayout) {
102 R__ASSERT(this->GetSize() <= this->GetContainer()->GetSize());
109 TCpuTensor(
const TCpuMatrix<AFloat> &matrix,
size_t dim = 3, MemoryLayout memlayout = MemoryLayout::ColumnMajor)
110 : TMVA::Experimental::RTensor<AFloat, TCpuBuffer<AFloat>>(std::make_shared<TCpuBuffer<AFloat>>(matrix.GetBuffer()),{matrix.GetNrows(), matrix.GetNcols()}, memlayout)
114 Shape_t shape = this->GetShape();
116 if (this->GetLayout() == MemoryLayout::ColumnMajor) {
117 shape.insert(shape.end(),dim-2, 1);
119 shape.insert(shape.begin(), dim - 2, 1);
121 this->ReshapeInplace(shape);
129 operator TMatrixT<AFloat>()
const {
131 if (this->GetShape().size() == 2 || (this->GetShape().size() == 3 && GetFirstSize() == 1)) {
132 TCpuMatrix<AFloat> temp = GetMatrix();
136 return TMatrixT<AFloat>(1, this->GetSize(), this->GetData());
142 AFloat *GetRawDataPointer() {
return *(this->GetContainer()); }
143 const AFloat *GetRawDataPointer()
const {
return *(this->GetContainer()); }
146 const TCpuBuffer<AFloat> & GetDeviceBuffer()
const {
return *(this->GetContainer());}
147 TCpuBuffer<AFloat> & GetDeviceBuffer() {
return *(this->GetContainer());}
150 size_t GetNoElements()
const {
return this->GetSize(); }
156 size_t GetFirstSize()
const
158 auto& shape = this->GetShape();
159 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.back() : shape.front();
162 size_t GetCSize()
const
164 auto& shape = this->GetShape();
165 if (shape.size() == 2)
return 1;
166 return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape.front() : shape[1];
169 size_t GetHSize()
const
171 auto& shape = this->GetShape();
172 if (shape.size() == 2)
return shape[0];
173 if (shape.size() == 3)
return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[0] : shape[1] ;
174 if (shape.size() >= 4)
return shape[2] ;
178 size_t GetWSize()
const
180 auto& shape = this->GetShape();
181 if (shape.size() == 2)
return shape[1];
182 if (shape.size() == 3)
return (this->GetMemoryLayout() == MemoryLayout::ColumnMajor) ? shape[1] : shape[2] ;
183 if (shape.size() >= 4)
return shape[3] ;
191 size_t GetNrows()
const {
return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetStrides().back() : this->GetShape().front();}
192 size_t GetNcols()
const {
return (GetLayout() == MemoryLayout::ColumnMajor ) ? this->GetShape().back() : this->GetStrides().front(); }
195 MemoryLayout GetLayout()
const {
return this->GetMemoryLayout(); }
198 TCpuMatrix<AFloat> GetMatrix()
const
201 auto& shape = this->GetShape();
203 for (
auto& shape_i : shape){
208 assert(ndims <= 2 && shape.size() > 1);
209 return TCpuMatrix<AFloat>(*(this->GetContainer()), GetHSize(), GetWSize());
213 TCpuTensor<AFloat> Reshape(Shape_t shape)
const
215 TCpuTensor<AFloat> x(*
this);
216 x.ReshapeInplace(shape);
222 TCpuTensor<AFloat> At(
size_t i)
224 auto &shape = this->GetShape();
225 auto layout = this->GetMemoryLayout();
226 Shape_t sliced_shape = (layout == MemoryLayout::RowMajor) ? Shape_t(shape.begin() + 1, shape.end())
227 : Shape_t(shape.begin(), shape.end() - 1);
229 size_t buffsize = (layout == MemoryLayout::RowMajor) ? this->GetStrides().front() : this->GetStrides().back();
230 size_t offset = i * buffsize;
232 return TCpuTensor<AFloat>(this->GetContainer()->GetSubBuffer(offset, buffsize), sliced_shape, layout);
235 TCpuTensor<AFloat> At(
size_t i)
const {
return (
const_cast<TCpuTensor<AFloat> &
>(*
this)).At(i); }
240 AFloat *data = *(this->GetContainer());
241 for (
size_t i = 0; i < this->GetSize(); ++i)
246 AFloat &operator()(
size_t i,
size_t j)
248 auto &shape = this->GetShape();
249 assert(shape.size() == 2);
250 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (*(this->GetContainer()))[i * shape[1] + j]
251 : (*(this->GetContainer()))[j * shape[0] + i];
256 AFloat &operator()(
size_t i,
size_t j,
size_t k)
258 auto &shape = this->GetShape();
259 assert(shape.size() == 3);
261 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
262 ? (*(this->GetContainer()))[i * shape[1] * shape[2] + j * shape[2] + k]
263 : (*(this->GetContainer()))[i * shape[0] * shape[1] + k * shape[0] + j];
267 AFloat operator()(
size_t i,
size_t j)
const
269 auto &shape = this->GetShape();
270 assert(shape.size() == 2);
271 return (this->GetMemoryLayout() == MemoryLayout::RowMajor) ? (this->GetData())[i * shape[1] + j]
272 : (this->GetData())[j * shape[0] + i];
275 AFloat operator()(
size_t i,
size_t j,
size_t k)
const
277 auto &shape = this->GetShape();
278 assert(shape.size() == 3);
280 return (this->GetMemoryLayout() == MemoryLayout::RowMajor)
281 ? (this->GetData())[i * shape[1] * shape[2] + j * shape[2] + k]
282 : (this->GetData())[i * shape[0] * shape[1] + k * shape[0] + j];
287 template <
typename Function_t>
288 void Map(Function_t & f);
292 template <
typename Function_t>
293 void MapFrom(Function_t & f,
const TCpuTensor<AFloat> &A);
295 size_t GetBufferUseCount()
const {
return this->GetContainer()->GetUseCount(); }
297 void Print(
const char *name =
"Tensor")
const
301 for (
size_t i = 0; i < this->GetSize(); i++)
302 std::cout << (this->GetData())[i] <<
" ";
303 std::cout << std::endl;
305 void PrintShape(
const char *name =
"Tensor")
const
307 std::string memlayout = (GetLayout() == MemoryLayout::RowMajor) ?
"RowMajor" :
"ColMajor";
308 std::cout << name <<
" shape : { ";
309 auto &shape = this->GetShape();
310 for (
size_t i = 0; i < shape.size() - 1; ++i)
311 std::cout << shape[i] <<
" , ";
312 std::cout << shape.back() <<
" } "
313 <<
" Layout : " << memlayout << std::endl;
318 template <
typename AFloat>
319 template <
typename Function_t>
320 inline void TCpuTensor<AFloat>::Map(Function_t &f)
322 AFloat *data = GetRawDataPointer();
323 size_t nelements = GetNoElements();
324 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
326 auto ff = [data, &nsteps, &nelements, &f](UInt_t workerID) {
327 size_t jMax = std::min(workerID + nsteps, nelements);
328 for (
size_t j = workerID; j < jMax; ++j) {
329 data[j] = f(data[j]);
334 if (nsteps < nelements) {
335 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
341 R__ASSERT(nelements == nsteps);
347 template <
typename AFloat>
348 template <
typename Function_t>
349 inline void TCpuTensor<AFloat>::MapFrom(Function_t &f,
const TCpuTensor<AFloat> &A)
351 AFloat *dataB = GetRawDataPointer();
352 const AFloat *dataA = A.GetRawDataPointer();
354 size_t nelements = GetNoElements();
355 R__ASSERT(nelements == A.GetNoElements());
356 size_t nsteps = TCpuMatrix<AFloat>::GetNWorkItems(nelements);
358 auto ff = [&dataB, &dataA, &nsteps, &nelements, &f](UInt_t workerID) {
359 size_t jMax = std::min(workerID + nsteps, nelements);
360 for (
size_t j = workerID; j < jMax; ++j) {
361 dataB[j] = f(dataA[j]);
365 if (nsteps < nelements) {
366 TMVA::Config::Instance().GetThreadExecutor().Foreach(ff, ROOT::TSeqI(0, nelements, nsteps));
371 R__ASSERT(nelements == nsteps);