BlackCat_Tensors
A GPU-supported autograd and linear algebra library, designed for neural network construction
host.h
Go to the documentation of this file.
1 /*
2  * host.h
3  *
4  * Created on: Dec 3, 2018
5  * Author: joseph
6  */
7 
8 #ifndef BC_BLAS_HOST_H_
9 #define BC_BLAS_HOST_H_
10 
11 #ifdef BC_BLAS_USE_C_LINKAGE
12 #define BC_EXTERN_C_BEGIN extern "C" {
13 #define BC_EXTERN_C_END }
14 #else
15 #define BC_EXTERN_C_BEGIN
16 #define BC_EXTERN_C_END
17 #endif
18 
19 #if __has_include(<cblas.h>)
20 
22 #include <cblas.h>
24 
25 #elif __has_include(<mkl.h>)
26 #include <mkl.h>
27 #else
28 #ifndef _MSC_VER
29 #warning "BLACKCAT_TENSORS REQUIRES A VALID <cblas.h> OR <mkl.h> IN ITS PATH"
30 #endif
31 #endif
32 
33 #undef BC_EXTERN_C_BEGIN
34 #undef BC_EXTERN_C_END
35 
36 namespace bc {
37 namespace blas {
38 
39 template<class SystemTag>
40 struct BLAS;
41 
42 /*
43  * creates a BLAS wrapper for BC_Tensors
44  * -> uses generic function names but without the prefix of s/d for the type.
45  */
46 template<>
47 struct BLAS<host_tag> {
48 
49  /*
50  * a = M x K
51  * b = K x N
52  * c = M x N
53  */
54 
55  template<class Stream>
56  static void gemm(
57  Stream stream, bool transA, bool transB,
59  const float* alpha, const float* A, bc::size_t lda,
60  const float* B, bc::size_t ldb,
61  const float* beta, float* C, bc::size_t ldc)
62  {
63  auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
64  auto TRANS_B = transB ? CblasTrans : CblasNoTrans;
65 
66  stream.enqueue([=]() {
67  cblas_sgemm(
68  CblasColMajor,
69  TRANS_A, TRANS_B, m, n, k,
70  *alpha, A, lda, B, ldb, *beta, C, ldc);
71  });
72  }
73 
74  template<class Stream>
75  static void gemm(
76  Stream stream, bool transA, bool transB,
78  const double *alpha, const double* A, bc::size_t lda,
79  const double* B, bc::size_t ldb,
80  const double *beta, double* C, bc::size_t ldc) {
81 
82  auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
83  auto TRANS_B = transB ? CblasTrans : CblasNoTrans;
84 
85  stream.enqueue([=]() {
86  cblas_dgemm(
87  CblasColMajor,
88  TRANS_A, TRANS_B, m, n, k,
89  *alpha, A, lda, B, ldb, *beta, C, ldc);
90  });
91 
92  }
93 
94  //y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
95  template<class Stream>
96  static void gemv(
97  Stream stream, bool transA,bc::size_t m, bc::size_t n,
98  const double* alpha, const double* A, bc::size_t lda,
99  const double* X, bc::size_t incX,
100  const double* beta, double* Y, bc::size_t incY) {
101 
102  auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
103 
104  stream.enqueue([=]() {
105  cblas_dgemv(
106  CblasColMajor,
107  TRANS_A, m, n,
108  *alpha, A, lda, X, incX, *beta, Y, incY);
109  });
110  }
111 
112  template<class Stream>
113  static void gemv(
114  Stream stream, bool transA, bc::size_t m, bc::size_t n,
115  const float* alpha, const float* A, bc::size_t lda,
116  const float* X, bc::size_t incX,
117  const float* beta, float* Y, bc::size_t incY)
118  {
119  auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
120 
121  stream.enqueue([=]() {
122  cblas_sgemv(
123  CblasColMajor,
124  TRANS_A, m, n,
125  *alpha, A, lda, X, incX, *beta, Y, incY);
126  });
127  }
128 
129  template<class Stream>
130  static void ger(Stream stream, int m, bc::size_t n,
131  const double* alpha,
132  const double* X, bc::size_t incX,
133  const double* Y, bc::size_t incY,
134  double* A, bc::size_t lda) {
135 
136  stream.enqueue([=]() {
137  cblas_dger(
138  CblasColMajor,
139  m, n,
140  *alpha, X, incX, Y, incY, A, lda);
141  });
142  }
143 
144  template<class Stream>
145  static void ger(Stream stream, int m, bc::size_t n,
146  const float* alpha,
147  const float* X, bc::size_t incX,
148  const float* Y, bc::size_t incY,
149  float* A, bc::size_t lda) {
150 
151  stream.enqueue([=]() {
152  cblas_sger(
153  CblasColMajor,
154  m, n,
155  *alpha, X, incX, Y, incY, A, lda);
156  });
157  }
158 
159  template<class Stream>
160  static void dot(Stream stream, int n,
161  double* A,
162  const double* x, bc::size_t incX,
163  const double* y, bc::size_t incY) {
164 
165  stream.enqueue([=]() {
166  *A = cblas_ddot(n, x, incX, y, incY);
167  });
168  }
169 
170  template<class Stream>
171  static void dot(Stream stream, int n,
172  float* A,
173  const float* x, bc::size_t incX,
174  const float* y, bc::size_t incY) {
175 
176  stream.enqueue([=]() {
177  *A = cblas_sdot(n, x, incX, y, incY);
178  });
179  }
180 };
181 }
182 
183 }
184 
185 
186 #endif /* HOST_H_ */
static void ger(Stream stream, int m, bc::size_t n, const float *alpha, const float *X, bc::size_t incX, const float *Y, bc::size_t incY, float *A, bc::size_t lda)
Definition: host.h:145
Definition: device.h:17
static void ger(Stream stream, int m, bc::size_t n, const double *alpha, const double *X, bc::size_t incX, const double *Y, bc::size_t incY, double *A, bc::size_t lda)
Definition: host.h:130
static void gemm(Stream stream, bool transA, bool transB, bc::size_t m, bc::size_t n, bc::size_t k, const float *alpha, const float *A, bc::size_t lda, const float *B, bc::size_t ldb, const float *beta, float *C, bc::size_t ldc)
Definition: host.h:56
static void gemv(Stream stream, bool transA, bc::size_t m, bc::size_t n, const float *alpha, const float *A, bc::size_t lda, const float *X, bc::size_t incX, const float *beta, float *Y, bc::size_t incY)
Definition: host.h:113
static void dot(Stream stream, int n, float *A, const float *x, bc::size_t incX, const float *y, bc::size_t incY)
Definition: host.h:171
#define BC_EXTERN_C_BEGIN
Definition: host.h:15
int size_t
Definition: common.h:283
#define BC_EXTERN_C_END
Definition: host.h:16
static void dot(Stream stream, int n, double *A, const double *x, bc::size_t incX, const double *y, bc::size_t incY)
Definition: host.h:160
static void gemm(Stream stream, bool transA, bool transB, bc::size_t m, bc::size_t n, bc::size_t k, const double *alpha, const double *A, bc::size_t lda, const double *B, bc::size_t ldb, const double *beta, double *C, bc::size_t ldc)
Definition: host.h:75
Definition: common.h:26
Definition: device.h:27
static void gemv(Stream stream, bool transA, bc::size_t m, bc::size_t n, const double *alpha, const double *A, bc::size_t lda, const double *X, bc::size_t incX, const double *beta, double *Y, bc::size_t incY)
Definition: host.h:96
The Evaluator determines if an expression needs to be greedily optimized.
Definition: algorithms.h:22