CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
util_arch.cuh
Go to the documentation of this file.
1 /******************************************************************************
2  * Copyright (c) 2011, Duane Merrill. All rights reserved.
3  * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of the NVIDIA CORPORATION nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  ******************************************************************************/
28 
34 #pragma once
35 
36 #include "util_namespace.cuh"
37 
39 CUB_NS_PREFIX
40 
42 namespace cub {
43 
44 
50 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
51 
53 #ifndef __CUDA_ARCH__
54  #define CUB_PTX_ARCH 0
55 #else
56  #define CUB_PTX_ARCH __CUDA_ARCH__
57 #endif
58 
60 #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__))
61  #define CUB_RUNTIME_ENABLED
62  #define CUB_RUNTIME_FUNCTION __host__ __device__
63 #else
64  #define CUB_RUNTIME_FUNCTION __host__
65 #endif
66 
67 #endif // DOXYGEN_SHOULD_SKIP_THIS
68 
69 
71 #define CUB_LOG_WARP_THREADS(arch) \
72  (5)
73 
75 #define CUB_WARP_THREADS(arch) \
76  (1 << CUB_LOG_WARP_THREADS(arch))
77 
79 #define CUB_LOG_SMEM_BANKS(arch) \
80  ((arch >= 200) ? \
81  (5) : \
82  (4))
83 
85 #define CUB_SMEM_BANKS(arch) \
86  (1 << CUB_LOG_SMEM_BANKS(arch))
87 
89 #define CUB_SMEM_BANK_BYTES(arch) \
90  (4)
91 
93 #define CUB_SMEM_BYTES(arch) \
94  ((arch >= 200) ? \
95  (48 * 1024) : \
96  (16 * 1024))
97 
99 #define CUB_SMEM_ALLOC_UNIT(arch) \
100  ((arch >= 300) ? \
101  (256) : \
102  ((arch >= 200) ? \
103  (128) : \
104  (512)))
105 
107 #define CUB_REGS_BY_BLOCK(arch) \
108  ((arch >= 200) ? \
109  (false) : \
110  (true))
111 
113 #define CUB_REG_ALLOC_UNIT(arch) \
114  ((arch >= 300) ? \
115  (256) : \
116  ((arch >= 200) ? \
117  (64) : \
118  ((arch >= 120) ? \
119  (512) : \
120  (256))))
121 
123 #define CUB_WARP_ALLOC_UNIT(arch) \
124  ((arch >= 300) ? \
125  (4) : \
126  (2))
127 
129 #define CUB_MAX_SM_THREADS(arch) \
130  ((arch >= 300) ? \
131  (2048) : \
132  ((arch >= 200) ? \
133  (1536) : \
134  ((arch >= 120) ? \
135  (1024) : \
136  (768))))
137 
139 #define CUB_MAX_SM_BLOCKS(arch) \
140  ((arch >= 300) ? \
141  (16) : \
142  (8))
143 
145 #define CUB_MAX_BLOCK_THREADS(arch) \
146  ((arch >= 200) ? \
147  (1024) : \
148  (512))
149 
151 #define CUB_MAX_SM_REGISTERS(arch) \
152  ((arch >= 300) ? \
153  (64 * 1024) : \
154  ((arch >= 200) ? \
155  (32 * 1024) : \
156  ((arch >= 120) ? \
157  (16 * 1024) : \
158  (8 * 1024))))
159 
161 #define CUB_SUBSCRIPTION_FACTOR(arch) \
162  ((arch >= 300) ? \
163  (5) : \
164  ((arch >= 200) ? \
165  (3) : \
166  (10)))
167 
169 #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \
170  ((arch >= 300) ? \
171  (1) : \
172  (4))
173 
174 #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document
175 
176 #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH)
177 #define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH)
178 #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH)
179 #define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH)
180 #define CUB_PTX_SMEM_BANK_BYTES CUB_SMEM_BANK_BYTES(CUB_PTX_ARCH)
181 #define CUB_PTX_SMEM_BYTES CUB_SMEM_BYTES(CUB_PTX_ARCH)
182 #define CUB_PTX_SMEM_ALLOC_UNIT CUB_SMEM_ALLOC_UNIT(CUB_PTX_ARCH)
183 #define CUB_PTX_REGS_BY_BLOCK CUB_REGS_BY_BLOCK(CUB_PTX_ARCH)
184 #define CUB_PTX_REG_ALLOC_UNIT CUB_REG_ALLOC_UNIT(CUB_PTX_ARCH)
185 #define CUB_PTX_WARP_ALLOC_UNIT CUB_WARP_ALLOC_UNIT(CUB_PTX_ARCH)
186 #define CUB_PTX_MAX_SM_THREADS CUB_MAX_SM_THREADS(CUB_PTX_ARCH)
187 #define CUB_PTX_MAX_SM_BLOCKS CUB_MAX_SM_BLOCKS(CUB_PTX_ARCH)
188 #define CUB_PTX_MAX_BLOCK_THREADS CUB_MAX_BLOCK_THREADS(CUB_PTX_ARCH)
189 #define CUB_PTX_MAX_SM_REGISTERS CUB_MAX_SM_REGISTERS(CUB_PTX_ARCH)
190 #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH)
191 
192 #endif // Do not document
193 
194  // end group UtilMgmt
196 
197 } // CUB namespace
198 CUB_NS_POSTFIX // Optional outer namespace(s)