CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
device_histogram.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 #include <limits>
40 
41 #include "dispatch/dispatch_histogram.cuh"
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
64 {
65  /******************************************************************/
69 
119  template <
120  typename SampleIteratorT,
121  typename CounterT,
122  typename LevelT,
123  typename OffsetT>
124  CUB_RUNTIME_FUNCTION
125  static cudaError_t HistogramEven(
126  void* d_temp_storage,
127  size_t& temp_storage_bytes,
128  SampleIteratorT d_samples,
129  CounterT* d_histogram,
130  int num_levels,
131  LevelT lower_level,
132  LevelT upper_level,
133  OffsetT num_samples,
134  cudaStream_t stream = 0,
135  bool debug_synchronous = false)
136  {
138  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
139 
140  CounterT* d_histogram1[1] = {d_histogram};
141  int num_levels1[1] = {num_levels};
142  LevelT lower_level1[1] = {lower_level};
143  LevelT upper_level1[1] = {upper_level};
144 
145  return MultiHistogramEven<1, 1>(
146  d_temp_storage,
147  temp_storage_bytes,
148  d_samples,
149  d_histogram1,
150  num_levels1,
151  lower_level1,
152  upper_level1,
153  num_samples,
154  1,
155  sizeof(SampleT) * num_samples,
156  stream,
157  debug_synchronous);
158  }
159 
160 
219  template <
220  typename SampleIteratorT,
221  typename CounterT,
222  typename LevelT,
223  typename OffsetT>
224  CUB_RUNTIME_FUNCTION
225  static cudaError_t HistogramEven(
226  void* d_temp_storage,
227  size_t& temp_storage_bytes,
228  SampleIteratorT d_samples,
229  CounterT* d_histogram,
230  int num_levels,
231  LevelT lower_level,
232  LevelT upper_level,
233  OffsetT num_row_samples,
234  OffsetT num_rows,
235  size_t row_stride_bytes,
236  cudaStream_t stream = 0,
237  bool debug_synchronous = false)
238  {
239  CounterT* d_histogram1[1] = {d_histogram};
240  int num_levels1[1] = {num_levels};
241  LevelT lower_level1[1] = {lower_level};
242  LevelT upper_level1[1] = {upper_level};
243 
244  return MultiHistogramEven<1, 1>(
245  d_temp_storage,
246  temp_storage_bytes,
247  d_samples,
248  d_histogram1,
249  num_levels1,
250  lower_level1,
251  upper_level1,
252  num_row_samples,
253  num_rows,
254  row_stride_bytes,
255  stream,
256  debug_synchronous);
257  }
258 
320  template <
321  int NUM_CHANNELS,
322  int NUM_ACTIVE_CHANNELS,
323  typename SampleIteratorT,
324  typename CounterT,
325  typename LevelT,
326  typename OffsetT>
327  CUB_RUNTIME_FUNCTION
328  static cudaError_t MultiHistogramEven(
329  void* d_temp_storage,
330  size_t& temp_storage_bytes,
331  SampleIteratorT d_samples,
332  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
333  int num_levels[NUM_ACTIVE_CHANNELS],
334  LevelT lower_level[NUM_ACTIVE_CHANNELS],
335  LevelT upper_level[NUM_ACTIVE_CHANNELS],
336  OffsetT num_pixels,
337  cudaStream_t stream = 0,
338  bool debug_synchronous = false)
339  {
341  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
342 
343  return MultiHistogramEven<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
344  d_temp_storage,
345  temp_storage_bytes,
346  d_samples,
347  d_histogram,
348  num_levels,
349  lower_level,
350  upper_level,
351  num_pixels,
352  1,
353  sizeof(SampleT) * NUM_CHANNELS * num_pixels,
354  stream,
355  debug_synchronous);
356  }
357 
358 
428  template <
429  int NUM_CHANNELS,
430  int NUM_ACTIVE_CHANNELS,
431  typename SampleIteratorT,
432  typename CounterT,
433  typename LevelT,
434  typename OffsetT>
435  CUB_RUNTIME_FUNCTION
436  static cudaError_t MultiHistogramEven(
437  void* d_temp_storage,
438  size_t& temp_storage_bytes,
439  SampleIteratorT d_samples,
440  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
441  int num_levels[NUM_ACTIVE_CHANNELS],
442  LevelT lower_level[NUM_ACTIVE_CHANNELS],
443  LevelT upper_level[NUM_ACTIVE_CHANNELS],
444  OffsetT num_row_pixels,
445  OffsetT num_rows,
446  size_t row_stride_bytes,
447  cudaStream_t stream = 0,
448  bool debug_synchronous = false)
449  {
451  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
452  Int2Type<sizeof(SampleT) == 1> is_byte_sample;
453 
454  if ((sizeof(OffsetT) > sizeof(int)) && (row_stride_bytes * num_rows < std::numeric_limits<int>::max()))
455  {
456  // Down-convert OffsetT data type
457 
458 
459  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchEven(
460  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
461  (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
462  stream, debug_synchronous, is_byte_sample);
463  }
464 
465  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchEven(
466  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level,
467  num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
468  stream, debug_synchronous, is_byte_sample);
469  }
470 
471 
473  /******************************************************************/
477 
526  template <
527  typename SampleIteratorT,
528  typename CounterT,
529  typename LevelT,
530  typename OffsetT>
531  CUB_RUNTIME_FUNCTION
532  static cudaError_t HistogramRange(
533  void* d_temp_storage,
534  size_t& temp_storage_bytes,
535  SampleIteratorT d_samples,
536  CounterT* d_histogram,
537  int num_levels,
538  LevelT* d_levels,
539  OffsetT num_samples,
540  cudaStream_t stream = 0,
541  bool debug_synchronous = false)
542  {
544  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
545 
546  CounterT* d_histogram1[1] = {d_histogram};
547  int num_levels1[1] = {num_levels};
548  LevelT* d_levels1[1] = {d_levels};
549 
550  return MultiHistogramRange<1, 1>(
551  d_temp_storage,
552  temp_storage_bytes,
553  d_samples,
554  d_histogram1,
555  num_levels1,
556  d_levels1,
557  num_samples,
558  1,
559  sizeof(SampleT) * num_samples,
560  stream,
561  debug_synchronous);
562  }
563 
564 
622  template <
623  typename SampleIteratorT,
624  typename CounterT,
625  typename LevelT,
626  typename OffsetT>
627  CUB_RUNTIME_FUNCTION
628  static cudaError_t HistogramRange(
629  void* d_temp_storage,
630  size_t& temp_storage_bytes,
631  SampleIteratorT d_samples,
632  CounterT* d_histogram,
633  int num_levels,
634  LevelT* d_levels,
635  OffsetT num_row_samples,
636  OffsetT num_rows,
637  size_t row_stride_bytes,
638  cudaStream_t stream = 0,
639  bool debug_synchronous = false)
640  {
641  CounterT* d_histogram1[1] = {d_histogram};
642  int num_levels1[1] = {num_levels};
643  LevelT* d_levels1[1] = {d_levels};
644 
645  return MultiHistogramRange<1, 1>(
646  d_temp_storage,
647  temp_storage_bytes,
648  d_samples,
649  d_histogram1,
650  num_levels1,
651  d_levels1,
652  num_row_samples,
653  num_rows,
654  row_stride_bytes,
655  stream,
656  debug_synchronous);
657  }
658 
720  template <
721  int NUM_CHANNELS,
722  int NUM_ACTIVE_CHANNELS,
723  typename SampleIteratorT,
724  typename CounterT,
725  typename LevelT,
726  typename OffsetT>
727  CUB_RUNTIME_FUNCTION
728  static cudaError_t MultiHistogramRange(
729  void* d_temp_storage,
730  size_t& temp_storage_bytes,
731  SampleIteratorT d_samples,
732  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
733  int num_levels[NUM_ACTIVE_CHANNELS],
734  LevelT* d_levels[NUM_ACTIVE_CHANNELS],
735  OffsetT num_pixels,
736  cudaStream_t stream = 0,
737  bool debug_synchronous = false)
738  {
740  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
741 
742  return MultiHistogramRange<NUM_CHANNELS, NUM_ACTIVE_CHANNELS>(
743  d_temp_storage,
744  temp_storage_bytes,
745  d_samples,
746  d_histogram,
747  num_levels,
748  d_levels,
749  num_pixels,
750  1,
751  sizeof(SampleT) * NUM_CHANNELS * num_pixels,
752  stream,
753  debug_synchronous);
754  }
755 
756 
824  template <
825  int NUM_CHANNELS,
826  int NUM_ACTIVE_CHANNELS,
827  typename SampleIteratorT,
828  typename CounterT,
829  typename LevelT,
830  typename OffsetT>
831  CUB_RUNTIME_FUNCTION
832  static cudaError_t MultiHistogramRange(
833  void* d_temp_storage,
834  size_t& temp_storage_bytes,
835  SampleIteratorT d_samples,
836  CounterT* d_histogram[NUM_ACTIVE_CHANNELS],
837  int num_levels[NUM_ACTIVE_CHANNELS],
838  LevelT* d_levels[NUM_ACTIVE_CHANNELS],
839  OffsetT num_row_pixels,
840  OffsetT num_rows,
841  size_t row_stride_bytes,
842  cudaStream_t stream = 0,
843  bool debug_synchronous = false)
844  {
846  typedef typename std::iterator_traits<SampleIteratorT>::value_type SampleT;
847  Int2Type<sizeof(SampleT) == 1> is_byte_sample;
848 
849  if ((sizeof(OffsetT) > sizeof(int)) && (row_stride_bytes * num_rows < std::numeric_limits<int>::max()))
850  {
851  // Down-convert OffsetT data type
852  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, int>::DispatchRange(
853  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
854  (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)),
855  stream, debug_synchronous, is_byte_sample);
856  }
857 
858  return DipatchHistogram<NUM_CHANNELS, NUM_ACTIVE_CHANNELS, SampleIteratorT, CounterT, LevelT, OffsetT>::DispatchRange(
859  d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels,
860  num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)),
861  stream, debug_synchronous, is_byte_sample);
862  }
863 
864 
865 
867 };
868 
873 } // CUB namespace
874 CUB_NS_POSTFIX // Optional outer namespace(s)
875 
876