26 template <
typename AFloat>
27 AFloat TCpu<AFloat>::MeanSquaredError(
const TCpuMatrix<AFloat> &Y,
const TCpuMatrix<AFloat> &output,
28 const TCpuMatrix<AFloat> &weights)
30 const AFloat *dataY = Y.GetRawDataPointer();
31 const AFloat *dataOutput = output.GetRawDataPointer();
32 const AFloat *dataWeights = weights.GetRawDataPointer();
33 std::vector<AFloat> temp(Y.GetNoElements());
34 size_t m = Y.GetNrows();
35 AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
37 auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {
38 AFloat dy = dataY[workerID] - dataOutput[workerID];
39 temp[workerID] = dataWeights[workerID % m] * dy * dy;
43 auto reduction = [](
const std::vector<AFloat> & v )
45 return std::accumulate(v.begin(),v.end(),AFloat{});
48 Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));
49 return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
53 template <
typename AFloat>
54 void TCpu<AFloat>::MeanSquaredErrorGradients(TCpuMatrix<AFloat> &dY,
const TCpuMatrix<AFloat> &Y,
55 const TCpuMatrix<AFloat> &output,
const TCpuMatrix<AFloat> &weights)
58 AFloat *dataDY = dY.GetRawDataPointer();
59 const AFloat *dataY = Y.GetRawDataPointer();
60 const AFloat *dataOutput = output.GetRawDataPointer();
61 const AFloat *dataWeights = weights.GetRawDataPointer();
63 size_t m = Y.GetNrows();
64 AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
66 auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {
67 dataDY[workerID] = -2.0 * norm * (dataY[workerID] - dataOutput[workerID]);
68 dataDY[workerID] *= dataWeights[workerID % m];
72 Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));
76 template <
typename AFloat>
77 AFloat TCpu<AFloat>::CrossEntropy(
const TCpuMatrix<AFloat> &Y,
const TCpuMatrix<AFloat> &output,
78 const TCpuMatrix<AFloat> &weights)
80 const AFloat *dataY = Y.GetRawDataPointer();
81 const AFloat *dataOutput = output.GetRawDataPointer();
82 const AFloat *dataWeights = weights.GetRawDataPointer();
83 std::vector<AFloat> temp(Y.GetNoElements());
85 size_t m = Y.GetNrows();
86 AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
88 auto f = [&dataY, &dataOutput, &dataWeights, &temp, m](UInt_t workerID) {
89 AFloat y = dataY[workerID];
90 AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));
92 temp[workerID] = - log(1.0 - sig);
94 temp[workerID] = - log(sig);
96 temp[workerID] = - (y * log(sig) + (1.0 - y) * log(1.0 - sig));
98 temp[workerID] *= dataWeights[workerID % m];
102 auto reduction = [](
const std::vector<AFloat> & v )
104 return std::accumulate(v.begin(),v.end(),AFloat{});
107 Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));
108 return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
112 template <
typename AFloat>
113 void TCpu<AFloat>::CrossEntropyGradients(TCpuMatrix<AFloat> &dY,
const TCpuMatrix<AFloat> &Y,
114 const TCpuMatrix<AFloat> &output,
const TCpuMatrix<AFloat> &weights)
116 AFloat *dataDY = dY.GetRawDataPointer();
117 const AFloat *dataY = Y.GetRawDataPointer();
118 const AFloat *dataOutput = output.GetRawDataPointer();
119 const AFloat *dataWeights = weights.GetRawDataPointer();
121 size_t m = Y.GetNrows();
122 AFloat norm = 1.0 / ((AFloat) Y.GetNrows() * Y.GetNcols());
124 auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, m, norm](UInt_t workerID) {
125 AFloat y = dataY[workerID];
126 AFloat sig = 1.0 / (1.0 + exp(- dataOutput[workerID]));
127 dataDY[workerID] = norm * (sig - y);
128 dataDY[workerID] *= dataWeights[workerID % m];
132 Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNoElements()));
136 template <
typename AFloat>
137 AFloat TCpu<AFloat>::SoftmaxCrossEntropy(
const TCpuMatrix<AFloat> &Y,
const TCpuMatrix<AFloat> &output,
138 const TCpuMatrix<AFloat> &weights)
140 const AFloat *dataY = Y.GetRawDataPointer();
141 const AFloat *dataOutput = output.GetRawDataPointer();
142 const AFloat *dataWeights = weights.GetRawDataPointer();
144 std::vector<AFloat> temp(Y.GetNrows());
145 size_t m = Y.GetNrows();
146 size_t n = Y.GetNcols();
147 AFloat norm = 1.0 / ((AFloat) m);
149 auto f = [&dataY, &dataOutput, &dataWeights, &temp, n, m](UInt_t workerID) {
151 for (
size_t j = 0; j < n; j++) {
152 sum += exp(dataOutput[workerID + j * m]);
154 for (
size_t j = 0; j < n; j++) {
156 dataY[workerID + j * m] * log(exp(dataOutput[workerID + j * m]) / sum);
158 temp[workerID] *= dataWeights[workerID];
162 auto reduction = [](
const std::vector<AFloat> & v )
164 return std::accumulate(v.begin(),v.end(),AFloat{});
167 Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));
168 return norm * Y.GetThreadExecutor().Reduce(temp, reduction);
172 template <
typename AFloat>
173 void TCpu<AFloat>::SoftmaxCrossEntropyGradients(TCpuMatrix<AFloat> &dY,
const TCpuMatrix<AFloat> &Y,
174 const TCpuMatrix<AFloat> &output,
const TCpuMatrix<AFloat> &weights)
176 AFloat *dataDY = dY.GetRawDataPointer();
177 const AFloat *dataY = Y.GetRawDataPointer();
178 const AFloat *dataOutput = output.GetRawDataPointer();
179 const AFloat *dataWeights = weights.GetRawDataPointer();
181 size_t m = Y.GetNrows();
182 size_t n = Y.GetNcols();
183 AFloat norm = 1.0 / ((AFloat) m);
185 auto f = [&dataDY, &dataY, &dataOutput, &dataWeights, norm, n, m](UInt_t workerID) {
188 AFloat weight = dataWeights[workerID];
189 for (
size_t j = 0; j < n; j++) {
190 sum += exp(dataOutput[workerID + j * m]);
191 sumY += dataY[workerID + j * m];
193 for (
size_t j = 0; j < n; j++) {
194 dataDY[workerID + j * m] =
195 norm * (exp(dataOutput[workerID + j * m]) / sum * sumY - dataY[workerID + j * m]);
196 dataDY[workerID + j * m] *= weight;
201 Y.GetThreadExecutor().Map(f, ROOT::TSeqI(Y.GetNrows()));