8 #ifndef BC_BLAS_HOST_H_ 9 #define BC_BLAS_HOST_H_ 11 #ifdef BC_BLAS_USE_C_LINKAGE 12 #define BC_EXTERN_C_BEGIN extern "C" { 13 #define BC_EXTERN_C_END } 15 #define BC_EXTERN_C_BEGIN 16 #define BC_EXTERN_C_END 19 #if __has_include(<cblas.h>) 25 #elif __has_include(<mkl.h>) 29 #warning "BLACKCAT_TENSORS REQUIRES A VALID <cblas.h> OR <mkl.h> IN ITS PATH" 33 #undef BC_EXTERN_C_BEGIN 34 #undef BC_EXTERN_C_END 39 template<
class SystemTag>
55 template<
class Stream>
57 Stream stream,
bool transA,
bool transB,
59 const float* alpha,
const float* A,
bc::size_t lda,
63 auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
64 auto TRANS_B = transB ? CblasTrans : CblasNoTrans;
66 stream.enqueue([=]() {
69 TRANS_A, TRANS_B, m, n, k,
70 *alpha, A, lda, B, ldb, *beta, C, ldc);
74 template<
class Stream>
76 Stream stream,
bool transA,
bool transB,
78 const double *alpha,
const double* A,
bc::size_t lda,
80 const double *beta,
double* C,
bc::size_t ldc) {
82 auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
83 auto TRANS_B = transB ? CblasTrans : CblasNoTrans;
85 stream.enqueue([=]() {
88 TRANS_A, TRANS_B, m, n, k,
89 *alpha, A, lda, B, ldb, *beta, C, ldc);
95 template<
class Stream>
98 const double* alpha,
const double* A,
bc::size_t lda,
100 const double* beta,
double* Y,
bc::size_t incY) {
102 auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
104 stream.enqueue([=]() {
108 *alpha, A, lda, X, incX, *beta, Y, incY);
112 template<
class Stream>
115 const float* alpha,
const float* A,
bc::size_t lda,
119 auto TRANS_A = transA ? CblasTrans : CblasNoTrans;
121 stream.enqueue([=]() {
125 *alpha, A, lda, X, incX, *beta, Y, incY);
129 template<
class Stream>
136 stream.enqueue([=]() {
140 *alpha, X, incX, Y, incY, A, lda);
144 template<
class Stream>
151 stream.enqueue([=]() {
155 *alpha, X, incX, Y, incY, A, lda);
159 template<
class Stream>
165 stream.enqueue([=]() {
166 *A = cblas_ddot(n, x, incX, y, incY);
170 template<
class Stream>
176 stream.enqueue([=]() {
177 *A = cblas_sdot(n, x, incX, y, incY);
static void ger(Stream stream, int m, bc::size_t n, const float *alpha, const float *X, bc::size_t incX, const float *Y, bc::size_t incY, float *A, bc::size_t lda)
Definition: host.h:145
static void ger(Stream stream, int m, bc::size_t n, const double *alpha, const double *X, bc::size_t incX, const double *Y, bc::size_t incY, double *A, bc::size_t lda)
Definition: host.h:130
static void gemm(Stream stream, bool transA, bool transB, bc::size_t m, bc::size_t n, bc::size_t k, const float *alpha, const float *A, bc::size_t lda, const float *B, bc::size_t ldb, const float *beta, float *C, bc::size_t ldc)
Definition: host.h:56
static void gemv(Stream stream, bool transA, bc::size_t m, bc::size_t n, const float *alpha, const float *A, bc::size_t lda, const float *X, bc::size_t incX, const float *beta, float *Y, bc::size_t incY)
Definition: host.h:113
static void dot(Stream stream, int n, float *A, const float *x, bc::size_t incX, const float *y, bc::size_t incY)
Definition: host.h:171
#define BC_EXTERN_C_BEGIN
Definition: host.h:15
int size_t
Definition: common.h:283
#define BC_EXTERN_C_END
Definition: host.h:16
static void dot(Stream stream, int n, double *A, const double *x, bc::size_t incX, const double *y, bc::size_t incY)
Definition: host.h:160
static void gemm(Stream stream, bool transA, bool transB, bc::size_t m, bc::size_t n, bc::size_t k, const double *alpha, const double *A, bc::size_t lda, const double *B, bc::size_t ldb, const double *beta, double *C, bc::size_t ldc)
Definition: host.h:75
static void gemv(Stream stream, bool transA, bc::size_t m, bc::size_t n, const double *alpha, const double *A, bc::size_t lda, const double *X, bc::size_t incX, const double *beta, double *Y, bc::size_t incY)
Definition: host.h:96
The Evaluator determines if an expression needs to be greedily optimized.
Definition: algorithms.h:22