|
CUB
|
Classes | |
| struct | cub::CachingDeviceAllocator |
| A simple caching allocator for device memory allocations. More... | |
Macros | |
| #define | CUB_PTX_ARCH 0 |
| CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass). | |
| #define | CUB_RUNTIME_ENABLED |
| Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. | |
| #define | CUB_RUNTIME_FUNCTION __host__ __device__ |
| #define | CUB_LOG_WARP_THREADS(arch) (5) |
| Number of threads per warp (log) | |
| #define | CUB_WARP_THREADS(arch) (1 << CUB_LOG_WARP_THREADS(arch)) |
| Number of threads per warp. | |
| #define | CUB_LOG_SMEM_BANKS(arch) |
| Number of smem banks (log) More... | |
| #define | CUB_SMEM_BANKS(arch) (1 << CUB_LOG_SMEM_BANKS(arch)) |
| Number of smem banks. | |
| #define | CUB_SMEM_BANK_BYTES(arch) (4) |
| Number of bytes per smem bank. | |
| #define | CUB_SMEM_BYTES(arch) |
| Number of smem bytes provisioned per SM. More... | |
| #define | CUB_SMEM_ALLOC_UNIT(arch) |
| Smem allocation size in bytes. More... | |
| #define | CUB_REGS_BY_BLOCK(arch) |
| Whether or not the architecture allocates registers by block (or by warp) More... | |
| #define | CUB_REG_ALLOC_UNIT(arch) |
| Number of registers allocated at a time per block (or by warp) More... | |
| #define | CUB_WARP_ALLOC_UNIT(arch) |
| Granularity of warps for which registers are allocated. More... | |
| #define | CUB_MAX_SM_THREADS(arch) |
| Maximum number of threads per SM. More... | |
| #define | CUB_MAX_SM_BLOCKS(arch) |
| Maximum number of thread blocks per SM. More... | |
| #define | CUB_MAX_BLOCK_THREADS(arch) |
| Maximum number of threads per thread block. More... | |
| #define | CUB_MAX_SM_REGISTERS(arch) |
| Maximum number of registers per SM. More... | |
| #define | CUB_SUBSCRIPTION_FACTOR(arch) |
| Oversubscription factor. More... | |
| #define | CUB_PREFER_CONFLICT_OVER_PADDING(arch) |
| Prefer padding overhead vs X-way conflicts greater than this threshold. More... | |
| #define | CubDebug(e) cub::Debug((e), __FILE__, __LINE__) |
| Debug macro. | |
| #define | CubDebugExit(e) if (cub::Debug((e), __FILE__, __LINE__)) { exit(1); } |
| Debug macro with exit. | |
| #define | CubLog(format,...) printf(format,__VA_ARGS__); |
| Log macro for printf statements. | |
Functions | |
| __host__ __device__ __forceinline__ cudaError_t | cub::Debug (cudaError_t error, const char *filename, int line) |
| CUB error reporting macro (prints error messages to stderr) More... | |
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::PtxVersion (int &ptx_version) |
| Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) More... | |
|
CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::SmVersion (int &sm_version, int device_ordinal) |
| Retrieves the SM version (major * 100 + minor * 10) | |
| template<typename KernelPtr > | |
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t | cub::MaxSmOccupancy (int &max_sm_occupancy, KernelPtr kernel_ptr, int block_threads) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block. More... | |
| #define CUB_LOG_SMEM_BANKS | ( | arch) |
Number of smem banks (log)
Definition at line 78 of file util_arch.cuh.
| #define CUB_SMEM_BYTES | ( | arch) |
Number of smem bytes provisioned per SM.
Definition at line 92 of file util_arch.cuh.
| #define CUB_SMEM_ALLOC_UNIT | ( | arch) |
Smem allocation size in bytes.
Definition at line 98 of file util_arch.cuh.
| #define CUB_REGS_BY_BLOCK | ( | arch) |
Whether or not the architecture allocates registers by block (or by warp)
Definition at line 106 of file util_arch.cuh.
| #define CUB_REG_ALLOC_UNIT | ( | arch) |
Number of registers allocated at a time per block (or by warp)
Definition at line 112 of file util_arch.cuh.
| #define CUB_WARP_ALLOC_UNIT | ( | arch) |
Granularity of warps for which registers are allocated.
Definition at line 122 of file util_arch.cuh.
| #define CUB_MAX_SM_THREADS | ( | arch) |
Maximum number of threads per SM.
Definition at line 128 of file util_arch.cuh.
| #define CUB_MAX_SM_BLOCKS | ( | arch) |
Maximum number of thread blocks per SM.
Definition at line 138 of file util_arch.cuh.
| #define CUB_MAX_BLOCK_THREADS | ( | arch) |
Maximum number of threads per thread block.
Definition at line 144 of file util_arch.cuh.
| #define CUB_MAX_SM_REGISTERS | ( | arch) |
Maximum number of registers per SM.
Definition at line 150 of file util_arch.cuh.
| #define CUB_SUBSCRIPTION_FACTOR | ( | arch) |
Oversubscription factor.
Definition at line 160 of file util_arch.cuh.
| #define CUB_PREFER_CONFLICT_OVER_PADDING | ( | arch) |
Prefer padding overhead vs X-way conflicts greater than this threshold.
Definition at line 168 of file util_arch.cuh.
| __host__ __device__ __forceinline__ cudaError_t cub::Debug | ( | cudaError_t | error, |
| const char * | filename, | ||
| int | line | ||
| ) |
CUB error reporting macro (prints error messages to stderr)
If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr (or stdout in device code) along with the supplied source context.
Definition at line 68 of file util_debug.cuh.
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::PtxVersion | ( | int & | ptx_version) |
Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10)
Type definition of the EmptyKernel kernel entry point
Force EmptyKernel<void> to be generated if this class is used
Definition at line 118 of file util_device.cuh.
| CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t cub::MaxSmOccupancy | ( | int & | max_sm_occupancy, |
| KernelPtr | kernel_ptr, | ||
| int | block_threads | ||
| ) |
Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer kernel_ptr on the current device with block_threads per thread block.
| [out] | max_sm_occupancy | maximum number of thread blocks that can reside on a single SM |
| [in] | kernel_ptr | Kernel pointer for which to compute SM occupancy |
| [in] | block_threads | Number of threads per thread block |
Definition at line 334 of file util_device.cuh.
1.8.4