BlackCat_Tensors
A GPU-supported autograd and linear algebra library, designed for neural network construction
host.h
Go to the documentation of this file.
1 /*
2  * Host_Add.h
3  *
4  * Created on: Aug 27, 2019
5  * Author: joseph
6  */
7 
8 #ifndef BLACKCATTENSORS_TENSORS_FUNCTIONS_REDUCTIONS_HOST_REDUCE_H_
9 #define BLACKCATTENSORS_TENSORS_FUNCTIONS_REDUCTIONS_HOST_REDUCE_H_
10 
11 namespace bc {
12 namespace tensors {
13 namespace exprs {
14 namespace functions {
15 
16 template<class>
17 struct Reduce;
18 
19 template<>
20 struct Reduce<bc::host_tag> {
21 
22  template<class Stream, class ScalarOutput, class Expression>
23  static void sum(Stream stream, ScalarOutput output, Expression expression) {
24 
25  auto function = [&]() {
26 
27  using value_type = typename Expression::value_type;
28 
29  value_type& total = output[0];
30  total = 0;
31  #if defined(_OPENMP) && !defined(BC_NO_OPENMP)
32  #pragma omp parallel for reduction(+:total)
33  #endif
34  for (bc::size_t i = 0; i < expression.size(); ++i) {
35  total += expression[i];
36  }
37  };
38  stream.enqueue(function);
39  }
40 };
41 
42 }
43 }
44 }
45 }
46 
47 
48 #endif /* HOST_ADD_H_ */
static void sum(Stream stream, ScalarOutput output, Expression expression)
Definition: host.h:23
int size_t
Definition: common.h:283
Definition: common.h:26
Definition: device.h:27
The Evaluator determines if an expression needs to be greedily optimized.
Definition: algorithms.h:22