BlackCat_Tensors
A GPU-supported autograd and linear algebra library, designed for neural network construction
common.h
Go to the documentation of this file.
1 /* Project: BlackCat_Tensors
2  * Author: JosephJaspers
3  * Copyright 2018
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
8 
9 #ifndef BLACKCAT_COMMON_H_
10 #define BLACKCAT_COMMON_H_
11 
12 #include <type_traits>
13 #include <cstdio>
14 
15 namespace bc {
16 
17 #ifndef BC_DEFAULT_SYSTEM_TAG
18 #define BC_DEFAULT_SYSTEM_TAG host_tag
19 #endif
20 
21 class system_tag_base {};
22 
23 template<class DerivedTag>
25 
26 struct host_tag: system_tag_type<host_tag>
27 {
29  using default_integer_type = int;
30 };
31 
32 struct device_tag : system_tag_type<device_tag>
33 {
35  using default_integer_type = int;
36 };
37 
38 template<class T>
39 struct is_system_tag {
40  static constexpr bool value = std::is_base_of<system_tag_base, T>::value;
41 };
42 
43 template<class T>
44 static constexpr bool is_system_tag_v = is_system_tag<T>::value;
45 
47 
48 }
49 
50 /*
51  * This file defines all global macros and compilation switches.
52  */
53 
54 // --------------------------------- compile options --------------------------------- //
55 
56 //#define BC_NO_OPENMP //Disables automatic multi-threading of element-wise operations (if openmp is linked)
57 //#define BC_EXECUTION_POLICIES //Enables execution policies
58 //#define NDEBUG //Disables runtime checks
59 //#define BC_CPP20 //enables C++20 features -- None: this is reserved for future, NVCC does not support cpp20 features
60 //#define BC_CLING_JIT //Defines certain code based upon if we are using a cling
61 
62 // --------------------------------- override macro-option s --------------------------------- //
63 //#define BC_INLINE_OVERRIDE <compiler_attribute> //overloads the default inline attribute
64 //#define BC_SIZE_T_OVERRIDE <integer_type> //overloads the default size_t (default is signed int)
65 
66 // ------------- define if cuda is defined ----------------- //
67 #ifdef __CUDACC__
68 #define BC_IF_CUDA(...) __VA_ARGS__
69 #define BC_IF_NO_CUDA(...)
70 #else
71 #define BC_IF_CUDA(...)
72 #define BC_IF_NO_CUDA(...) __VA_ARGS__
73 #endif
74 
75 // --------------------------------- inline macros -----------------------------------------//
76 
77 #ifdef __CUDACC__
78  #define BCHOSTDEV __host__ __device__
79 #else
80  #define BCHOSTDEV
81 #endif
82 
83 #ifdef BC_INLINE_OVERRIDE
84  #define BCINLINE BCHOSTDEV BC_INLINE_OVERRIDE
85  #define BCHOT BC_INLINE_OVERRIDE
86 #else
87  #if defined(__GNUG__) || defined(__GNUC__) || defined(__clang__) || defined(__cling__)
88  #define BCINLINE BCHOSTDEV inline __attribute__((always_inline)) __attribute__((hot)) //host_device inline
89  #define BCHOT inline __attribute__((always_inline)) __attribute__((hot)) //device-only inline
90 
91  #elif defined(_MSC_VER)
92  #define BCINLINE BCHOSTDEV __forceinline
93  #define BCHOT __forceinline
94 
95  #else
96  #define BCINLINE BCHOSTDEV inline
97  #define BCHOT inline
98  #endif
99 #endif
100 
101 // --------------------------------- unique address -----------------------------------------//
102 
103 
104 #ifdef BC_CPP20
105 #define BC_NO_UNIQUE_ADDRESS [[no_unique_address]]
106 #else
107 #define BC_NO_UNIQUE_ADDRESS
108 #endif
109 
110 // --------------------------------- asserts -----------------------------------------//
111 
112 // Visual Studio
113 #ifdef _MSC_VER
114 #define __PRETTY_FUNCTION__ __FUNCSIG__
115 #endif
116 
117 #include <iostream>
118 
119 namespace bc {
120 
121 static std::ostream* global_output_stream = &std::cout;
122 
123 template<class RM_UNUSED_FUNCTION_WARNING=void>
124 inline void set_print_stream(std::ostream* ostream) {
125  global_output_stream = ostream;
126 }
127 
128 template<class RM_UNUSED_FUNCTION_WARNING=void>
129 inline std::ostream* get_print_stream() {
130  return global_output_stream;
131 }
132 
133 static std::ostream* global_error_output_stream = &std::cerr;
134 
135 template<class RM_UNUSED_FUNCTION_WARNING=void>
136 inline void set_error_stream(std::ostream* ostream) {
137  global_error_output_stream = ostream;
138 }
139 
140 template<class RM_UNUSED_FUNCTION_WARNING=void>
141 inline std::ostream* get_error_stream() {
142  return global_error_output_stream;
143 }
144 
145 
146 namespace detail {
147 
148 template<class T=char>
149 void print_impl(std::ostream* os, const T& arg='\n') {
150  if (!os) return;
151  *os << arg << std::endl;
152 }
153 
154 template<class T, class... Ts>
155 void print_impl(std::ostream* os, const T& arg, const Ts&... args) {
156  if (!os) return;
157 
158  *os << arg << " ";
159  print_impl(os, args...);
160 }
161 
162 }
163 
164 template<class... Ts>
165 void print(const Ts&... args) {
167 }
168 
169 template<class... Ts>
170 void printerr(const Ts&... args) {
172 }
173 
174 template<class str_type>
175 inline void bc_assert(bool condition, str_type msg, const char* file, const char* function, int line) {
176  if (!condition) {
177  bc::printerr("BC_ASSERT FAILURE: ",
178  "\nfile: ", file,
179  "\nfunction: ", function,
180  "\nline: ", line,
181  "\nerror: ", msg);
182  throw 1;
183  }
184 }
185 #define BC_ASSERT(condition, message)\
186 { bc::bc_assert(condition, message, __FILE__, __PRETTY_FUNCTION__, __LINE__); }
187 
188 }
189 
190 #ifdef __CUDACC__
191 #include <cublas.h>
192 namespace bc {
193 
194 #define BC_CUDA_ASSERT(...)\
195 { BC_cuda_assert((__VA_ARGS__), __FILE__, __PRETTY_FUNCTION__, __LINE__); }
196 
197 inline void BC_cuda_assert(
198  cudaError_t code,
199  const char *file,
200  const char* function,
201  int line)
202 {
203  if (code != cudaSuccess)
204  {
205  bc::printerr("BC_CUDA_ASSERT FAILURE: ", cudaGetErrorString(code),
206  "\nfile: ", file,
207  "\nfunction: ", function,
208  "\nline: ", line);
209  throw code;
210  }
211 }
212 
213 inline void BC_cuda_assert(
214  cublasStatus_t code,
215  const char *file,
216  const char* function,
217  int line)
218 {
219  if (code != CUBLAS_STATUS_SUCCESS)
220  {
221  bc::printerr("BC_CUBLAS CALL_FAILURE: ",
222  "cublas error: ", code,
223  "\nfile: ", file,
224  "\nfunction: ", function,
225  "\nline: ", line);
226  throw code;
227  }
228 }
229 
230 
231 #if __has_include(<cudnn.h>)
232 #include <cudnn.h>
233 inline void BC_cuda_assert(
234  cudnnStatus_t code,
235  const char *file,
236  const char* function,
237  int line)
238 {
239  if (code != CUDNN_STATUS_SUCCESS)
240  {
241  std::cout << "BC_CUBLAS CALL_FAILURE: " <<
242  "cudnn error: " << cudnnGetErrorString(code) <<
243  "\nfile: " << file <<
244  "\nfunction: " << function <<
245  "\tline: " << line << std::endl;
246  throw code;
247  }
248 }
249 #endif
250 }
251 
252 #endif
253 
254 // ---------------- openmp macros ---------------- //
255 
256 #if defined(_OPENMP) && !defined(BC_NO_OPENMP)
257  #define BC_OPENMP
258  #define BC_omp_parallel__ _Pragma("omp parallel")
259  #define BC_omp_async__(...) BC_omp_parallel__ {_Pragma("omp single nowait") {__VA_ARGS__ } }
260  #define BC_omp_atomic__ _Pragma("omp atomic")
261  #define BC_omp_for__ _Pragma("omp parallel for")
262  #define BC_omp_bar__ _Pragma("omp barrier")
263  #define __BC_CONCAT_REDUCTION_LITERAL(oper, value) omp parallel for reduction(oper:value)
264  #define BC_omp_reduction__(oper, value) BC_omp_for__ reduction(oper:value)
265 #else
266  #define BC_omp_async__(...) __VA_ARGS__
267  #define BC_omp_parallel__
268  #define BC_omp_atomic__
269  #define BC_omp_for__
270  #define BC_omp_bar__
271  #define BC_omp_for_reduction__(oper, value)
272 #endif
273 
274 // --------------------------------- constants --------------------------------- //
275 
276 namespace bc {
277 
278 
279 #ifndef BC_SIZE_T
280 #define BC_SIZE_T int
281 #endif
282 
283 using size_t = BC_SIZE_T;
284 
285 static constexpr bc::size_t MULTITHREAD_THRESHOLD = 16384;
286 
287 #ifdef __CUDACC__
288 namespace {
289  static bc::size_t CUDA_BASE_THREADS = 512;
290 }
291 
292 static void set_cuda_base_threads(bc::size_t nthreads) {
293  CUDA_BASE_THREADS = nthreads;
294 }
295 
296 static bc::size_t get_cuda_base_threads() {
297  return CUDA_BASE_THREADS;
298 }
299 
300 static bc::size_t calculate_threads(bc::size_t sz = CUDA_BASE_THREADS) {
301  return sz > CUDA_BASE_THREADS ? CUDA_BASE_THREADS : sz;
302 }
303 
304 static bc::size_t calculate_block_dim(int size) {
305  return 1 + (int)(size / CUDA_BASE_THREADS);
306 }
307 
308 #define BC_CUDA_KERNEL_LOOP_XYZ(i, n, xyz) \
309  for (int i = blockIdx.xyz * blockDim.xyz + threadIdx.xyz; \
310  i < (n); \
311  i += blockDim.xyz * gridDim.xyz)
312 
313 #define BC_CUDA_KERNEL_LOOP_X(i, n) BC_CUDA_KERNEL_LOOP_XYZ(i,n,x)
314 #define BC_CUDA_KERNEL_LOOP_Y(i, n) BC_CUDA_KERNEL_LOOP_XYZ(i,n,y)
315 #define BC_CUDA_KERNEL_LOOP_Z(i, n) BC_CUDA_KERNEL_LOOP_XYZ(i,n,z)
316 
317 #endif
318 
319 
320 // ------------ classname ------------- //
321 #if defined(__GNUG__) || defined(__GNUC__)
322 #include <cxxabi.h>
323 template<class T>
324 inline const char* bc_get_classname_of(const T& arg) {
325  int status;
326  return abi::__cxa_demangle(typeid(arg).name(),0,0,&status);
327 }
328 #else
329 template<class T>
330 inline const char* bc_get_classname_of(const T& arg) {
331  return typeid(arg).name();
332 }
333 #endif
334 }
335 
336 
337 #endif /* BLACKCAT_COMMON_H_ */
void set_print_stream(std::ostream *ostream)
Definition: common.h:124
std::ostream * get_error_stream()
Definition: common.h:141
const char * bc_get_classname_of(const T &arg)
Definition: common.h:330
void bc_assert(bool condition, str_type msg, const char *file, const char *function, int line)
Definition: common.h:175
Definition: common.h:24
Definition: common.h:39
#define BC_SIZE_T
Definition: common.h:280
int default_integer_type
Definition: common.h:29
std::ostream * get_print_stream()
Definition: common.h:129
Definition: common.h:32
double default_floating_point_type
Definition: common.h:28
int default_integer_type
Definition: common.h:35
int size_t
Definition: common.h:283
void print_impl(std::ostream *os, const T &arg='\n')
Definition: common.h:149
void print_impl(std::ostream *os, const T &arg, const Ts &... args)
Definition: common.h:155
void BC_cuda_assert(cudaError_t code, const char *file, const char *function, int line)
Definition: common.h:197
void print(const Ts &... args)
Definition: common.h:165
void printerr(const Ts &... args)
Definition: common.h:170
#define BC_DEFAULT_SYSTEM_TAG
Definition: common.h:18
Definition: common.h:21
Definition: common.h:26
The Evaluator determines if an expression needs to be greedily optimized.
Definition: algorithms.h:22
void set_error_stream(std::ostream *ostream)
Definition: common.h:136
float default_floating_point_type
Definition: common.h:34