|
CUB
|
Classes | |
| struct | cub::CachingDeviceAllocator |
| A simple caching allocator for device memory allocations. More... | |
Macros | |
| #define | CUB_LOG_WARP_THREADS(arch) (5) |
| Number of threads per warp (log) | |
| #define | CUB_WARP_THREADS(arch) (1 << CUB_LOG_WARP_THREADS(arch)) |
| Number of threads per warp. | |
| #define | CUB_LOG_SMEM_BANKS(arch) |
| Number of smem banks (log) More... | |
| #define | CUB_SMEM_BANKS(arch) (1 << CUB_LOG_SMEM_BANKS(arch)) |
| Number of smem banks. | |
| #define | CUB_SMEM_BANK_BYTES(arch) (4) |
| Number of bytes per smem bank. | |
| #define | CUB_SMEM_BYTES(arch) |
| Number of smem bytes provisioned per SM. More... | |
| #define | CUB_SMEM_ALLOC_UNIT(arch) |
| Smem allocation size in bytes. More... | |
| #define | CUB_REGS_BY_BLOCK(arch) |
| Whether or not the architecture allocates registers by block (or by warp) More... | |
| #define | CUB_REG_ALLOC_UNIT(arch) |
| Number of registers allocated at a time per block (or by warp) More... | |
| #define | CUB_WARP_ALLOC_UNIT(arch) |
| Granularity of warps for which registers are allocated. More... | |
| #define | CUB_MAX_SM_THREADS(arch) |
| Maximum number of threads per SM. More... | |
| #define | CUB_MAX_SM_BLOCKS(arch) |
| Maximum number of thread blocks per SM. More... | |
| #define | CUB_MAX_BLOCK_THREADS(arch) |
| Maximum number of threads per thread block. More... | |
| #define | CUB_MAX_SM_REGISTERS(arch) |
| Maximum number of registers per SM. More... | |
| #define | CUB_SUBSCRIPTION_FACTOR(arch) |
| Oversubscription factor. More... | |
| #define | CUB_PREFER_CONFLICT_OVER_PADDING(arch) |
| Prefer padding overhead vs X-way conflicts greater than this threshold. More... | |
| #define | CubDebug(e) cub::Debug((e), __FILE__, __LINE__) |
| Debug macro. | |
| #define | CubDebugExit(e) if (cub::Debug((e), __FILE__, __LINE__)) { exit(1); } |
| Debug macro with exit. | |
| #define | CubLog(format,...) printf(format,__VA_ARGS__); |
| Log macro for printf statements. | |
Functions | |
| __host__ __device__ __forceinline__ cudaError_t | cub::Debug (cudaError_t error, const char *filename, int line) |
| CUB error reporting macro (prints error messages to stderr) More... | |
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::PtxVersion (int &ptx_version) |
| Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) More... | |
|
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::SmVersion (int &sm_version, int device_ordinal) |
| Retrieves the SM version (major * 100 + minor * 10) | |
| template<typename KernelPtr > | |
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block. More... | |
| #define CUB_LOG_SMEM_BANKS | ( | arch) |
Number of smem banks (log)
Definition at line 79 of file util_arch.cuh.
| #define CUB_SMEM_BYTES | ( | arch) |
Number of smem bytes provisioned per SM.
Definition at line 93 of file util_arch.cuh.
| #define CUB_SMEM_ALLOC_UNIT | ( | arch) |
Smem allocation size in bytes.
Definition at line 99 of file util_arch.cuh.
| #define CUB_REGS_BY_BLOCK | ( | arch) |
Whether or not the architecture allocates registers by block (or by warp)
Definition at line 107 of file util_arch.cuh.
| #define CUB_REG_ALLOC_UNIT | ( | arch) |
Number of registers allocated at a time per block (or by warp)
Definition at line 113 of file util_arch.cuh.
| #define CUB_WARP_ALLOC_UNIT | ( | arch) |
Granularity of warps for which registers are allocated.
Definition at line 123 of file util_arch.cuh.
| #define CUB_MAX_SM_THREADS | ( | arch) |
Maximum number of threads per SM.
Definition at line 129 of file util_arch.cuh.
| #define CUB_MAX_SM_BLOCKS | ( | arch) |
Maximum number of thread blocks per SM.
Definition at line 139 of file util_arch.cuh.
| #define CUB_MAX_BLOCK_THREADS | ( | arch) |
Maximum number of threads per thread block.
Definition at line 145 of file util_arch.cuh.
| #define CUB_MAX_SM_REGISTERS | ( | arch) |
Maximum number of registers per SM.
Definition at line 151 of file util_arch.cuh.
| #define CUB_SUBSCRIPTION_FACTOR | ( | arch) |
Oversubscription factor.
Definition at line 161 of file util_arch.cuh.
| #define CUB_PREFER_CONFLICT_OVER_PADDING | ( | arch) |
Prefer padding overhead vs X-way conflicts greater than this threshold.
Definition at line 169 of file util_arch.cuh.
| __host__ __device__ __forceinline__ cudaError_t cub::Debug | ( | cudaError_t | error, |
| const char * | filename, | ||
| int | line | ||
| ) |
CUB error reporting macro (prints error messages to stderr)
If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context.
Definition at line 68 of file util_debug.cuh.
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion | ( | int & | ptx_version) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)
Type definition of the EmptyKernel kernel entry point
Force EmptyKernel<void> to be generated if this class is used
Definition at line 118 of file util_device.cuh.
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy | ( | int & | max_sm_occupancy, |
| KernelPtr | kernel_ptr, | ||
| int | block_threads | ||
| ) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block.
| [out] | max_sm_occupancy | maximum number of thread blocks that can reside on a single SM |
| [in] | kernel_ptr | Kernel pointer for which to compute SM occupancy |
| [in] | block_threads | Number of threads per thread block |
Definition at line 340 of file util_device.cuh.
1.8.4