BlackCat_Tensors
A GPU-supported autograd and linear algebra library, designed for neural network construction
device.h
Go to the documentation of this file.
1 /* Project: BlackCat_Tensors
2  * Author: JosephJaspers
3  * Copyright 2018
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 
9 #ifdef __CUDACC__
10 #ifndef BC_BLAS_DEVICE_H_
11 #define BC_BLAS_DEVICE_H_
12 
13 namespace bc {
14 namespace blas {
15 
16 template<class SystemTag>
17 struct BLAS;
18 
19 template<>
20 struct BLAS<device_tag> {
21 
22  template<class Stream>
23  static void gemm(
24  Stream stream,
25  bool transA,
26  bool transB,
27  bc::size_t m,
28  bc::size_t n,
29  bc::size_t k,
30  const float* alpha, const float* A, bc::size_t lda,
31  const float* B, bc::size_t ldb,
32  const float* beta, float* C, bc::size_t ldc)
33  {
34  auto TRANS_A = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
35  auto TRANS_B = transB ? CUBLAS_OP_T : CUBLAS_OP_N;
36 
37  stream.enqueue([=]() {
38  cublasHandle_t handle = stream.get_cublas_handle();
40  (cublasSgemm(
41  handle, TRANS_A, TRANS_B,
42  m, n, k,
43  alpha,
44  A, lda,
45  B, ldb,
46  beta, C, ldc)));
47  });
48  }
49 
50  //y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
51  template<class Stream>
52  static void gemv(
53  Stream stream,
54  bool transA,
55  bc::size_t m,
56  bc::size_t n,
57  const float* alpha, const float* A, bc::size_t lda,
58  const float* X, bc::size_t incX,
59  const float* beta, float* Y, bc::size_t incY)
60  {
61  auto TRANS_A = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
62 
63  stream.enqueue([=]() {
64  cublasHandle_t handle = stream.get_cublas_handle();
65  BC_CUDA_ASSERT((cublasSgemv(
66  handle, TRANS_A,
67  m, n,
68  alpha, A, lda,
69  X, incX, beta,
70  Y, incY)));
71  });
72  }
73 
74  template<class Stream>
75  static void ger(Stream stream, int m, bc::size_t n,
76  const float* alpha,
77  const float* X, bc::size_t incX,
78  const float* Y, bc::size_t incY,
79  float* A, bc::size_t lda)
80  {
81  stream.enqueue([=]() {
82  cublasHandle_t handle = stream.get_cublas_handle();
83  BC_CUDA_ASSERT((cublasSger(
84  handle,
85  m, n,
86  alpha,
87  X, incX,
88  Y, incY,
89  A, lda)));
90  });
91  }
92 
93  template<class Stream>
94  static void dot(
95  Stream stream,
96  int n,
97  float* A,
98  const float* x, bc::size_t incX,
99  const float* y, bc::size_t incY)
100  {
101  stream.enqueue([=]() {
102  cublasHandle_t handle = stream.get_cublas_handle();
103  BC_CUDA_ASSERT((cublasSdot(handle, n, x, incX, y, incY, A)));
104  });
105  }
106 };
107 
108 }
109 }
110 
111 
112 #endif /* GPU_BLAS_H_ */
113 #endif
Definition: device.h:17
static void gemm(Stream stream, bool transA, bool transB, bc::size_t m, bc::size_t n, bc::size_t k, const float *alpha, const float *A, bc::size_t lda, const float *B, bc::size_t ldb, const float *beta, float *C, bc::size_t ldc)
Definition: device.h:23
Definition: common.h:32
static void gemv(Stream stream, bool transA, bc::size_t m, bc::size_t n, const float *alpha, const float *A, bc::size_t lda, const float *X, bc::size_t incX, const float *beta, float *Y, bc::size_t incY)
Definition: device.h:52
static void ger(Stream stream, int m, bc::size_t n, const float *alpha, const float *X, bc::size_t incX, const float *Y, bc::size_t incY, float *A, bc::size_t lda)
Definition: device.h:75
int size_t
Definition: common.h:283
#define BC_CUDA_ASSERT(...)
Definition: common.h:194
static void dot(Stream stream, int n, float *A, const float *x, bc::size_t incX, const float *y, bc::size_t incY)
Definition: device.h:94
Definition: device.h:27
The Evaluator determines if an expression needs to be greedily optimized.
Definition: algorithms.h:22