12 #ifndef ROOT_TThreadExecutor
13 #define ROOT_TThreadExecutor
15 #include "RConfigure.h"
20 # if !defined(__ROOTCLING__) && !defined(G__DICTIONARY)
21 # error "Cannot use ROOT::TThreadExecutor without defining R__USE_IMT."
35 class TThreadExecutor:
public TExecutor<TThreadExecutor> {
37 explicit TThreadExecutor();
39 explicit TThreadExecutor(UInt_t nThreads);
41 TThreadExecutor(TThreadExecutor &) =
delete;
42 TThreadExecutor &operator=(TThreadExecutor &) =
delete;
45 void Foreach(F func,
unsigned nTimes,
unsigned nChunks = 0);
46 template<
class F,
class INTEGER>
47 void Foreach(F func, ROOT::TSeq<INTEGER> args,
unsigned nChunks = 0);
49 template<
class F,
class T>
50 void Foreach(F func, std::initializer_list<T> args,
unsigned nChunks = 0);
52 template<
class F,
class T>
53 void Foreach(F func, std::vector<T> &args,
unsigned nChunks = 0);
54 template<
class F,
class T>
55 void Foreach(F func,
const std::vector<T> &args,
unsigned nChunks = 0);
57 using TExecutor<TThreadExecutor>::Map;
58 template<
class F,
class Cond = noReferenceCond<F>>
59 auto Map(F func,
unsigned nTimes) -> std::vector<typename std::result_of<F()>::type>;
60 template<
class F,
class INTEGER,
class Cond = noReferenceCond<F, INTEGER>>
61 auto Map(F func, ROOT::TSeq<INTEGER> args) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
62 template<
class F,
class T,
class Cond = noReferenceCond<F, T>>
63 auto Map(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type>;
69 using TExecutor<TThreadExecutor>::MapReduce;
70 template<
class F,
class R,
class Cond = noReferenceCond<F>>
71 auto MapReduce(F func,
unsigned nTimes, R redfunc) ->
typename std::result_of<F()>::type;
72 template<
class F,
class R,
class Cond = noReferenceCond<F>>
73 auto MapReduce(F func,
unsigned nTimes, R redfunc,
unsigned nChunks) ->
typename std::result_of<F()>::type;
74 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
75 auto MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc,
unsigned nChunks) ->
typename std::result_of<F(INTEGER)>::type;
77 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
78 auto MapReduce(F func, std::initializer_list<T> args, R redfunc,
unsigned nChunks) ->
typename std::result_of<F(T)>::type;
80 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
81 auto MapReduce(F func, std::vector<T> &args, R redfunc) ->
typename std::result_of<F(T)>::type;
82 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
83 auto MapReduce(F func, std::vector<T> &args, R redfunc,
unsigned nChunks) ->
typename std::result_of<F(T)>::type;
85 using TExecutor<TThreadExecutor>::Reduce;
86 template<
class T,
class BINARYOP>
auto Reduce(
const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()));
87 template<
class T,
class R>
auto Reduce(
const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
89 unsigned GetPoolSize();
92 template<
class F,
class R,
class Cond = noReferenceCond<F>>
93 auto Map(F func,
unsigned nTimes, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F()>::type>;
94 template<
class F,
class INTEGER,
class R,
class Cond = noReferenceCond<F, INTEGER>>
95 auto Map(F func, ROOT::TSeq<INTEGER> args, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type>;
96 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
97 auto Map(F func, std::vector<T> &args, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
98 template<
class F,
class T,
class R,
class Cond = noReferenceCond<F, T>>
99 auto Map(F func, std::initializer_list<T> args, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type>;
102 void ParallelFor(
unsigned start,
unsigned end,
unsigned step,
const std::function<
void(
unsigned int i)> &f);
103 double ParallelReduce(
const std::vector<double> &objs,
const std::function<
double(
double a,
double b)> &redfunc);
104 float ParallelReduce(
const std::vector<float> &objs,
const std::function<
float(
float a,
float b)> &redfunc);
105 template<
class T,
class R>
106 auto SeqReduce(
const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs));
108 std::shared_ptr<ROOT::Internal::TPoolManager> fSched =
nullptr;
118 void TThreadExecutor::Foreach(F func,
unsigned nTimes,
unsigned nChunks) {
120 ParallelFor(0U, nTimes, 1, [&](
unsigned int){func();});
124 unsigned step = (nTimes + nChunks - 1) / nChunks;
125 auto lambda = [&](
unsigned int i)
127 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
131 ParallelFor(0U, nTimes, step, lambda);
137 template<
class F,
class INTEGER>
138 void TThreadExecutor::Foreach(F func, ROOT::TSeq<INTEGER> args,
unsigned nChunks) {
140 ParallelFor(*args.begin(), *args.end(), args.step(), [&](
unsigned int i){func(i);});
143 unsigned start = *args.begin();
144 unsigned end = *args.end();
145 unsigned seqStep = args.step();
146 unsigned step = (end - start + nChunks - 1) / nChunks;
148 auto lambda = [&](
unsigned int i)
150 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
154 ParallelFor(start, end, step, lambda);
161 template<
class F,
class T>
162 void TThreadExecutor::Foreach(F func, std::initializer_list<T> args,
unsigned nChunks) {
163 std::vector<T> vargs(std::move(args));
164 Foreach(func, vargs, nChunks);
171 template<
class F,
class T>
172 void TThreadExecutor::Foreach(F func, std::vector<T> &args,
unsigned nChunks) {
173 unsigned int nToProcess = args.size();
175 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
179 unsigned step = (nToProcess + nChunks - 1) / nChunks;
180 auto lambda = [&](
unsigned int i)
182 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
186 ParallelFor(0U, nToProcess, step, lambda);
191 template<
class F,
class T>
192 void TThreadExecutor::Foreach(F func,
const std::vector<T> &args,
unsigned nChunks) {
193 unsigned int nToProcess = args.size();
195 ParallelFor(0U, nToProcess, 1, [&](
unsigned int i){func(args[i]);});
199 unsigned step = (nToProcess + nChunks - 1) / nChunks;
200 auto lambda = [&](
unsigned int i)
202 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
206 ParallelFor(0U, nToProcess, step, lambda);
214 template<
class F,
class Cond>
215 auto TThreadExecutor::Map(F func,
unsigned nTimes) -> std::vector<typename std::result_of<F()>::type> {
216 using retType = decltype(func());
217 std::vector<retType> reslist(nTimes);
218 auto lambda = [&](
unsigned int i)
222 ParallelFor(0U, nTimes, 1, lambda);
231 template<
class F,
class INTEGER,
class Cond>
232 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args) -> std::vector<typename std::result_of<F(INTEGER)>::type> {
233 unsigned start = *args.begin();
234 unsigned end = *args.end();
235 unsigned seqStep = args.step();
237 using retType = decltype(func(start));
238 std::vector<retType> reslist(args.size());
239 auto lambda = [&](
unsigned int i)
241 reslist[i] = func(i);
243 ParallelFor(start, end, seqStep, lambda);
252 template<
class F,
class R,
class Cond>
253 auto TThreadExecutor::Map(F func,
unsigned nTimes, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F()>::type> {
256 return Map(func, nTimes);
259 unsigned step = (nTimes + nChunks - 1) / nChunks;
261 unsigned actualChunks = (nTimes + step - 1) / step;
262 using retType = decltype(func());
263 std::vector<retType> reslist(actualChunks);
264 auto lambda = [&](
unsigned int i)
266 std::vector<retType> partialResults(std::min(nTimes-i, step));
267 for (
unsigned j = 0; j < step && (i + j) < nTimes; j++) {
268 partialResults[j] = func();
270 reslist[i / step] = Reduce(partialResults, redfunc);
272 ParallelFor(0U, nTimes, step, lambda);
283 template<
class F,
class T,
class Cond>
284 auto TThreadExecutor::Map(F func, std::vector<T> &args) -> std::vector<typename std::result_of<F(T)>::type> {
286 using retType = decltype(func(args.front()));
288 unsigned int nToProcess = args.size();
289 std::vector<retType> reslist(nToProcess);
291 auto lambda = [&](
unsigned int i)
293 reslist[i] = func(args[i]);
296 ParallelFor(0U, nToProcess, 1, lambda);
306 template<
class F,
class INTEGER,
class R,
class Cond>
307 auto TThreadExecutor::Map(F func, ROOT::TSeq<INTEGER> args, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F(INTEGER)>::type> {
310 return Map(func, args);
313 unsigned start = *args.begin();
314 unsigned end = *args.end();
315 unsigned seqStep = args.step();
316 unsigned step = (end - start + nChunks - 1) / nChunks;
318 unsigned actualChunks = (end - start + step - 1) / step;
320 using retType = decltype(func(start));
321 std::vector<retType> reslist(actualChunks);
322 auto lambda = [&](
unsigned int i)
324 std::vector<retType> partialResults(std::min(end-i, step));
325 for (
unsigned j = 0; j < step && (i + j) < end; j+=seqStep) {
326 partialResults[j] = func(i + j);
328 reslist[i / step] = Reduce(partialResults, redfunc);
330 ParallelFor(start, end, step, lambda);
341 template<
class F,
class T,
class R,
class Cond>
342 auto TThreadExecutor::Map(F func, std::vector<T> &args, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
345 return Map(func, args);
348 unsigned int nToProcess = args.size();
349 unsigned step = (nToProcess + nChunks - 1) / nChunks;
351 unsigned actualChunks = (nToProcess + step - 1) / step;
353 using retType = decltype(func(args.front()));
354 std::vector<retType> reslist(actualChunks);
355 auto lambda = [&](
unsigned int i)
357 std::vector<T> partialResults(step);
358 for (
unsigned j = 0; j < step && (i + j) < nToProcess; j++) {
359 partialResults[j] = func(args[i + j]);
361 reslist[i / step] = Reduce(partialResults, redfunc);
364 ParallelFor(0U, nToProcess, step, lambda);
374 template<
class F,
class T,
class R,
class Cond>
375 auto TThreadExecutor::Map(F func, std::initializer_list<T> args, R redfunc,
unsigned nChunks) -> std::vector<typename std::result_of<F(T)>::type> {
376 std::vector<T> vargs(std::move(args));
377 const auto &reslist = Map(func, vargs, redfunc, nChunks);
390 template<
class F,
class R,
class Cond>
391 auto TThreadExecutor::MapReduce(F func,
unsigned nTimes, R redfunc) ->
typename std::result_of<F()>::type {
392 return Reduce(Map(func, nTimes), redfunc);
395 template<
class F,
class R,
class Cond>
396 auto TThreadExecutor::MapReduce(F func,
unsigned nTimes, R redfunc,
unsigned nChunks) ->
typename std::result_of<F()>::type {
397 return Reduce(Map(func, nTimes, redfunc, nChunks), redfunc);
400 template<
class F,
class INTEGER,
class R,
class Cond>
401 auto TThreadExecutor::MapReduce(F func, ROOT::TSeq<INTEGER> args, R redfunc,
unsigned nChunks) ->
typename std::result_of<F(INTEGER)>::type {
402 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
405 template<
class F,
class T,
class R,
class Cond>
406 auto TThreadExecutor::MapReduce(F func, std::initializer_list<T> args, R redfunc,
unsigned nChunks) ->
typename std::result_of<F(T)>::type {
407 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
411 template<
class F,
class T,
class R,
class Cond>
412 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc) ->
typename std::result_of<F(T)>::type {
413 return Reduce(Map(func, args), redfunc);
416 template<
class F,
class T,
class R,
class Cond>
417 auto TThreadExecutor::MapReduce(F func, std::vector<T> &args, R redfunc,
unsigned nChunks) ->
typename std::result_of<F(T)>::type {
418 return Reduce(Map(func, args, redfunc, nChunks), redfunc);
424 template<
class T,
class BINARYOP>
425 auto TThreadExecutor::Reduce(
const std::vector<T> &objs, BINARYOP redfunc) -> decltype(redfunc(objs.front(), objs.front()))
428 static_assert(std::is_same<decltype(redfunc(objs.front(), objs.front())), T>::value,
"redfunc does not have the correct signature");
429 return ParallelReduce(objs, redfunc);
435 template<
class T,
class R>
436 auto TThreadExecutor::Reduce(
const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
439 static_assert(std::is_same<decltype(redfunc(objs)), T>::value,
"redfunc does not have the correct signature");
440 return SeqReduce(objs, redfunc);
443 template<
class T,
class R>
444 auto TThreadExecutor::SeqReduce(
const std::vector<T> &objs, R redfunc) -> decltype(redfunc(objs))
446 return redfunc(objs);