26 template<
typename AFloat>
27 AFloat TCpu<AFloat>::L1Regularization(
const TCpuMatrix<AFloat> &Weights)
29 const AFloat *data = Weights.GetRawDataPointer();
31 size_t nElements = Weights.GetNoElements();
32 size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);
34 std::vector<AFloat> temp(nElements/nSteps + 1);
36 auto f = [&data, &temp, nElements, nSteps](UInt_t workerID)
38 size_t iMax = std::min(workerID+nSteps, nElements);
39 size_t iWorker = workerID/nSteps;
40 for (
size_t i = workerID; i < iMax; ++i) {
41 temp[iWorker] += fabs(data[i]);
45 auto reduction = [](
const std::vector<AFloat> & v )
47 return std::accumulate(v.begin(),v.end(),AFloat{});
53 Weights.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps) );
54 return Weights.GetThreadExecutor().Reduce(temp, reduction);
59 template<
typename AFloat>
60 void TCpu<AFloat>::AddL1RegularizationGradients(
61 TCpuMatrix<AFloat> & B,
62 const TCpuMatrix<AFloat> & A,
65 AFloat *dataB = B.GetRawDataPointer();
66 const AFloat *dataA = A.GetRawDataPointer();
68 size_t nElements = B.GetNoElements();
69 R__ASSERT(A.GetNoElements() == nElements);
70 size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);
74 auto f = [&dataA, &dataB, weightDecay, nElements, nSteps](UInt_t workerID)
76 size_t iMax = std::min(workerID+nSteps, nElements);
77 for (
size_t i = workerID; i < iMax; ++i) {
78 AFloat sign = (dataA[i] < 0.0) ? -1.0 : 1.0;
79 dataB[i] += weightDecay * sign;
84 if (nSteps < nElements) {
86 B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements, nSteps));
88 for (
size_t i = 0; i < nElements; i+=nSteps)
97 template<
typename AFloat>
98 AFloat TCpu<AFloat>::L2Regularization(
const TCpuMatrix<AFloat> &Weights)
100 const AFloat *data = Weights.GetRawDataPointer();
102 size_t nElements = Weights.GetNoElements();
103 size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);
105 std::vector<AFloat> temp(nElements/nSteps + 1);
107 auto f = [&data, &temp, nElements, nSteps](UInt_t workerID)
109 size_t iMax = std::min(workerID+nSteps, nElements);
110 size_t iWorker = workerID/nSteps;
112 for (
size_t i = workerID; i < iMax; ++i) {
113 temp[iWorker] += data[i] * data[i];
117 auto reduction = [](
const std::vector<AFloat> & v )
119 return std::accumulate(v.begin(),v.end(),AFloat{});
126 Weights.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements,nSteps) );
127 return Weights.GetThreadExecutor().Reduce(temp, reduction);
131 template<
typename AFloat>
132 void TCpu<AFloat>::AddL2RegularizationGradients(
133 TCpuMatrix<AFloat> & B,
134 const TCpuMatrix<AFloat> & A,
137 AFloat *dataB = B.GetRawDataPointer();
138 const AFloat *dataA = A.GetRawDataPointer();
140 size_t nElements = B.GetNoElements();
141 R__ASSERT(A.GetNoElements() == nElements);
142 size_t nSteps = TCpuMatrix<AFloat>::GetNWorkItems(nElements);
144 auto f = [&dataA, &dataB, weightDecay, nElements, nSteps](UInt_t workerID)
146 size_t iMax = std::min(workerID+nSteps, nElements);
147 for (
size_t i = workerID; i < iMax; ++i) {
148 dataB[i] += 2.0 * weightDecay * dataA[i];
153 if (nSteps < nElements) {
155 B.GetThreadExecutor().Foreach(f, ROOT::TSeqI(0,nElements, nSteps));
157 for (
size_t i = 0; i < nElements; i+=nSteps)