|
template<class ValueType > |
using | Basic_Allocator = allocators::Allocator< ValueType, host_tag > |
|
template<class ValueType > |
using | Cuda_Allocator = allocators::Allocator< ValueType, device_tag > |
|
template<class ValueType > |
using | Cuda_Managed = allocators::Device_Managed< ValueType > |
|
using | default_system_tag_t = host_tag |
|
using | size_t = int |
|
template<int dim, class ValueType , class Allocator = tensors::detail::default_allocator<ValueType>> |
using | Tensor = bc::tensors::Tensor_Base< bc::tensors::exprs::Array< bc::Shape< dim >, ValueType, Allocator > > |
|
template<class ValueType , class Allocator = tensors::detail::default_allocator<ValueType>> |
using | Scalar = Tensor< 0, ValueType, Allocator > |
|
template<class ValueType , class Allocator = tensors::detail::default_allocator<ValueType>> |
using | Vector = Tensor< 1, ValueType, Allocator > |
|
template<class ValueType , class Allocator = tensors::detail::default_allocator<ValueType>> |
using | Matrix = Tensor< 2, ValueType, Allocator > |
|
template<class ValueType , class Allocator = tensors::detail::default_allocator<ValueType>> |
using | Cube = Tensor< 3, ValueType, Allocator > |
|
template<class ValueType , class Allocator = tensors::detail::default_allocator<ValueType>> |
using | VecList = bc::tensors::Tensor_Base< bc::tensors::exprs::Vector< ValueType, Allocator > > |
|
|
template<class RM_UNUSED_FUNCTION_WARNING = void> |
void | set_print_stream (std::ostream *ostream) |
|
template<class RM_UNUSED_FUNCTION_WARNING = void> |
std::ostream * | get_print_stream () |
|
template<class RM_UNUSED_FUNCTION_WARNING = void> |
void | set_error_stream (std::ostream *ostream) |
|
template<class RM_UNUSED_FUNCTION_WARNING = void> |
std::ostream * | get_error_stream () |
|
template<class... Ts> |
void | print (const Ts &... args) |
|
template<class... Ts> |
void | printerr (const Ts &... args) |
|
template<class str_type > |
void | bc_assert (bool condition, str_type msg, const char *file, const char *function, int line) |
|
void | BC_cuda_assert (cudaError_t code, const char *file, const char *function, int line) |
|
void | BC_cuda_assert (cublasStatus_t code, const char *file, const char *function, int line) |
|
template<class T > |
const char * | bc_get_classname_of (const T &arg) |
|
template<class Stream , class Indexes , class Image , class ImageOut > |
void | max_pooling_forward (Stream stream, Image image, ImageOut out, Indexes mask, Dim< 2 > krnl_shape, Dim< 2 > padding=Dim< 2 >().fill(0), Dim< 2 > strides={-1,-1}) |
|
template<class Stream , class Indexes , class Image , class ImageOut > |
void | max_pooling_backward (Stream stream, Image image, ImageOut delta, Indexes mask, Dim< 2 > krnl_shape, Dim< 2 > padding=Dim< 2 >().fill(0), Dim< 2 > strides=Dim< 2 >().fill(-1)) |
|
template<class Stream , class ColumnImage , class Image > |
void | im2col (Stream stream, ColumnImage col_image, Image image, bc::Dim< 3 > krnl_shape, bc::Dim< 2 > padding=bc::Dim< 2 >().fill(0), bc::Dim< 2 > strides=bc::Dim< 2 >().fill(1), bc::Dim< 2 > dilation=bc::Dim< 2 >().fill(1), int numb_spatial_axis=2) |
|
template<class Stream , class ColumnImage , class Image > |
void | col2im (Stream stream, ColumnImage col_image, Image image, bc::Dim< 3 > krnl_shape, bc::Dim< 2 > padding=bc::Dim< 2 >(), bc::Dim< 2 > strides=bc::Dim< 2 >().fill(1), bc::Dim< 2 > dilation=bc::Dim< 2 >().fill(1)) |
|
template<class... Integers> |
BCINLINE auto | dim (const Integers &... ints) |
|
template<class... Integers, typename = std::enable_if_t< traits::sequence_of_v<size_t, Integers...>>> |
BCINLINE auto | shape (Integers... ints) |
|
template<class InnerShape , typename = std::enable_if_t<! traits::sequence_of_v<size_t, InnerShape>>> |
BCINLINE auto | shape (InnerShape is) |
|
The Evaluator determines if an expression needs to be greedily optimized.
If it attempts to use the left-hand variable (the output) as cache to reduce temporaries as possible. Then, if functions still requiring evaluating will use temporaries to replace the functions and finally complete the evaluation of the expresssion with an elementwise (nd_evaluator).
Example: (assume matrices) y += a * b + c * d
Naively, this expression may generate 2 temporaries, one for each matrix multiplication. However, a more efficient way to evaluate this equation would be to make 2 gemm calls, gemm(y,a,b) and gemm(y, c, d).
This expression reordering works with more complex calls, such as.... y = abs(a * b + c * d).
Here we can apply... (the second gemm call updateing alpha to 1) gemm(y,a,b), gemm(y,c,d) followed by evaluating y := abs(y).