10 #ifndef BC_BLAS_DEVICE_H_ 11 #define BC_BLAS_DEVICE_H_ 16 template<
class SystemTag>
22 template<
class Stream>
30 const float* alpha,
const float* A,
bc::size_t lda,
34 auto TRANS_A = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
35 auto TRANS_B = transB ? CUBLAS_OP_T : CUBLAS_OP_N;
37 stream.enqueue([=]() {
38 cublasHandle_t handle = stream.get_cublas_handle();
41 handle, TRANS_A, TRANS_B,
51 template<
class Stream>
57 const float* alpha,
const float* A,
bc::size_t lda,
61 auto TRANS_A = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
63 stream.enqueue([=]() {
64 cublasHandle_t handle = stream.get_cublas_handle();
74 template<
class Stream>
81 stream.enqueue([=]() {
82 cublasHandle_t handle = stream.get_cublas_handle();
93 template<
class Stream>
101 stream.enqueue([=]() {
102 cublasHandle_t handle = stream.get_cublas_handle();
static void gemm(Stream stream, bool transA, bool transB, bc::size_t m, bc::size_t n, bc::size_t k, const float *alpha, const float *A, bc::size_t lda, const float *B, bc::size_t ldb, const float *beta, float *C, bc::size_t ldc)
Definition: device.h:23
static void gemv(Stream stream, bool transA, bc::size_t m, bc::size_t n, const float *alpha, const float *A, bc::size_t lda, const float *X, bc::size_t incX, const float *beta, float *Y, bc::size_t incY)
Definition: device.h:52
static void ger(Stream stream, int m, bc::size_t n, const float *alpha, const float *X, bc::size_t incX, const float *Y, bc::size_t incY, float *A, bc::size_t lda)
Definition: device.h:75
int size_t
Definition: common.h:283
#define BC_CUDA_ASSERT(...)
Definition: common.h:194
static void dot(Stream stream, int n, float *A, const float *x, bc::size_t incX, const float *y, bc::size_t incY)
Definition: device.h:94
The Evaluator determines if an expression needs to be greedily optimized.
Definition: algorithms.h:22