9 #ifndef BLACKCAT_COMMON_H_ 10 #define BLACKCAT_COMMON_H_ 12 #include <type_traits> 17 #ifndef BC_DEFAULT_SYSTEM_TAG 18 #define BC_DEFAULT_SYSTEM_TAG host_tag 23 template<
class DerivedTag>
40 static constexpr
bool value = std::is_base_of<system_tag_base, T>::value;
68 #define BC_IF_CUDA(...) __VA_ARGS__ 69 #define BC_IF_NO_CUDA(...) 71 #define BC_IF_CUDA(...) 72 #define BC_IF_NO_CUDA(...) __VA_ARGS__ 78 #define BCHOSTDEV __host__ __device__ 83 #ifdef BC_INLINE_OVERRIDE 84 #define BCINLINE BCHOSTDEV BC_INLINE_OVERRIDE 85 #define BCHOT BC_INLINE_OVERRIDE 87 #if defined(__GNUG__) || defined(__GNUC__) || defined(__clang__) || defined(__cling__) 88 #define BCINLINE BCHOSTDEV inline __attribute__((always_inline)) __attribute__((hot)) //host_device inline 89 #define BCHOT inline __attribute__((always_inline)) __attribute__((hot)) //device-only inline 91 #elif defined(_MSC_VER) 92 #define BCINLINE BCHOSTDEV __forceinline 93 #define BCHOT __forceinline 96 #define BCINLINE BCHOSTDEV inline 105 #define BC_NO_UNIQUE_ADDRESS [[no_unique_address]] 107 #define BC_NO_UNIQUE_ADDRESS 114 #define __PRETTY_FUNCTION__ __FUNCSIG__ 121 static std::ostream* global_output_stream = &std::cout;
123 template<
class RM_UNUSED_FUNCTION_WARNING=
void>
125 global_output_stream = ostream;
128 template<
class RM_UNUSED_FUNCTION_WARNING=
void>
130 return global_output_stream;
133 static std::ostream* global_error_output_stream = &std::cerr;
135 template<
class RM_UNUSED_FUNCTION_WARNING=
void>
137 global_error_output_stream = ostream;
140 template<
class RM_UNUSED_FUNCTION_WARNING=
void>
142 return global_error_output_stream;
148 template<
class T=
char>
151 *os << arg << std::endl;
154 template<
class T,
class... Ts>
155 void print_impl(std::ostream* os,
const T& arg,
const Ts&... args) {
164 template<
class... Ts>
169 template<
class... Ts>
174 template<
class str_type>
175 inline void bc_assert(
bool condition, str_type msg,
const char* file,
const char*
function,
int line) {
179 "\nfunction: ",
function,
185 #define BC_ASSERT(condition, message)\ 186 { bc::bc_assert(condition, message, __FILE__, __PRETTY_FUNCTION__, __LINE__); } 194 #define BC_CUDA_ASSERT(...)\ 195 { BC_cuda_assert((__VA_ARGS__), __FILE__, __PRETTY_FUNCTION__, __LINE__); } 200 const char*
function,
203 if (code != cudaSuccess)
205 bc::printerr(
"BC_CUDA_ASSERT FAILURE: ", cudaGetErrorString(code),
207 "\nfunction: ",
function,
216 const char*
function,
219 if (code != CUBLAS_STATUS_SUCCESS)
222 "cublas error: ", code,
224 "\nfunction: ",
function,
231 #if __has_include(<cudnn.h>) 236 const char*
function,
239 if (code != CUDNN_STATUS_SUCCESS)
241 std::cout <<
"BC_CUBLAS CALL_FAILURE: " <<
242 "cudnn error: " << cudnnGetErrorString(code) <<
243 "\nfile: " << file <<
244 "\nfunction: " <<
function <<
245 "\tline: " << line << std::endl;
256 #if defined(_OPENMP) && !defined(BC_NO_OPENMP) 258 #define BC_omp_parallel__ _Pragma("omp parallel") 259 #define BC_omp_async__(...) BC_omp_parallel__ {_Pragma("omp single nowait") {__VA_ARGS__ } } 260 #define BC_omp_atomic__ _Pragma("omp atomic") 261 #define BC_omp_for__ _Pragma("omp parallel for") 262 #define BC_omp_bar__ _Pragma("omp barrier") 263 #define __BC_CONCAT_REDUCTION_LITERAL(oper, value) omp parallel for reduction(oper:value) 264 #define BC_omp_reduction__(oper, value) BC_omp_for__ reduction(oper:value) 266 #define BC_omp_async__(...) __VA_ARGS__ 267 #define BC_omp_parallel__ 268 #define BC_omp_atomic__ 271 #define BC_omp_for_reduction__(oper, value) 280 #define BC_SIZE_T int 285 static constexpr
bc::size_t MULTITHREAD_THRESHOLD = 16384;
292 static void set_cuda_base_threads(
bc::size_t nthreads) {
293 CUDA_BASE_THREADS = nthreads;
297 return CUDA_BASE_THREADS;
301 return sz > CUDA_BASE_THREADS ? CUDA_BASE_THREADS : sz;
304 static bc::size_t calculate_block_dim(
int size) {
305 return 1 + (int)(size / CUDA_BASE_THREADS);
308 #define BC_CUDA_KERNEL_LOOP_XYZ(i, n, xyz) \ 309 for (int i = blockIdx.xyz * blockDim.xyz + threadIdx.xyz; \ 311 i += blockDim.xyz * gridDim.xyz) 313 #define BC_CUDA_KERNEL_LOOP_X(i, n) BC_CUDA_KERNEL_LOOP_XYZ(i,n,x) 314 #define BC_CUDA_KERNEL_LOOP_Y(i, n) BC_CUDA_KERNEL_LOOP_XYZ(i,n,y) 315 #define BC_CUDA_KERNEL_LOOP_Z(i, n) BC_CUDA_KERNEL_LOOP_XYZ(i,n,z) 321 #if defined(__GNUG__) || defined(__GNUC__) 326 return abi::__cxa_demangle(
typeid(arg).name(),0,0,&status);
331 return typeid(arg).name();
void set_print_stream(std::ostream *ostream)
Definition: common.h:124
std::ostream * get_error_stream()
Definition: common.h:141
const char * bc_get_classname_of(const T &arg)
Definition: common.h:330
void bc_assert(bool condition, str_type msg, const char *file, const char *function, int line)
Definition: common.h:175
#define BC_SIZE_T
Definition: common.h:280
int default_integer_type
Definition: common.h:29
std::ostream * get_print_stream()
Definition: common.h:129
double default_floating_point_type
Definition: common.h:28
int default_integer_type
Definition: common.h:35
int size_t
Definition: common.h:283
void print_impl(std::ostream *os, const T &arg='\n')
Definition: common.h:149
void print_impl(std::ostream *os, const T &arg, const Ts &... args)
Definition: common.h:155
void BC_cuda_assert(cudaError_t code, const char *file, const char *function, int line)
Definition: common.h:197
void print(const Ts &... args)
Definition: common.h:165
void printerr(const Ts &... args)
Definition: common.h:170
#define BC_DEFAULT_SYSTEM_TAG
Definition: common.h:18
The Evaluator determines if an expression needs to be greedily optimized.
Definition: algorithms.h:22
void set_error_stream(std::ostream *ostream)
Definition: common.h:136
float default_floating_point_type
Definition: common.h:34