CUB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
device_reduce.cuh
Go to the documentation of this file.
1 
2 /******************************************************************************
3  * Copyright (c) 2011, Duane Merrill. All rights reserved.
4  * Copyright (c) 2011-2015, NVIDIA CORPORATION. All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  * * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  * * Redistributions in binary form must reproduce the above copyright
11  * notice, this list of conditions and the following disclaimer in the
12  * documentation and/or other materials provided with the distribution.
13  * * Neither the name of the NVIDIA CORPORATION nor the
14  * names of its contributors may be used to endorse or promote products
15  * derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20  * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  ******************************************************************************/
29 
35 #pragma once
36 
37 #include <stdio.h>
38 #include <iterator>
39 
40 #include "dispatch/dispatch_reduce.cuh"
41 #include "dispatch/dispatch_reduce_by_key.cuh"
42 #include "../util_namespace.cuh"
43 
45 CUB_NS_PREFIX
46 
48 namespace cub {
49 
50 
83 {
137  template <
138  typename InputIteratorT,
139  typename OutputIteratorT,
140  typename ReductionOp>
141  CUB_RUNTIME_FUNCTION
142  static cudaError_t Reduce(
143  void* d_temp_storage,
144  size_t &temp_storage_bytes,
145  InputIteratorT d_in,
146  OutputIteratorT d_out,
147  int num_items,
148  ReductionOp reduction_op,
149  cudaStream_t stream = 0,
150  bool debug_synchronous = false)
151  {
152  // Signed integer type for global offsets
153  typedef int OffsetT;
154 
155  // Dispatch type
156  typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOp> DispatchReduce;
157 
158  return DispatchReduce::Dispatch(
159  d_temp_storage,
160  temp_storage_bytes,
161  d_in,
162  d_out,
163  num_items,
164  reduction_op,
165  stream,
166  debug_synchronous);
167  }
168 
169 
215  template <
216  typename InputIteratorT,
217  typename OutputIteratorT>
218  CUB_RUNTIME_FUNCTION
219  static cudaError_t Sum(
220  void* d_temp_storage,
221  size_t &temp_storage_bytes,
222  InputIteratorT d_in,
223  OutputIteratorT d_out,
224  int num_items,
225  cudaStream_t stream = 0,
226  bool debug_synchronous = false)
227  {
228  // Signed integer type for global offsets
229  typedef int OffsetT;
230 
231  // Dispatch type
232  typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Sum> DispatchReduce;
233 
234  return DispatchReduce::Dispatch(
235  d_temp_storage,
236  temp_storage_bytes,
237  d_in,
238  d_out,
239  num_items,
240  cub::Sum(),
241  stream,
242  debug_synchronous);
243  }
244 
245 
287  template <
288  typename InputIteratorT,
289  typename OutputIteratorT>
290  CUB_RUNTIME_FUNCTION
291  static cudaError_t Min(
292  void* d_temp_storage,
293  size_t &temp_storage_bytes,
294  InputIteratorT d_in,
295  OutputIteratorT d_out,
296  int num_items,
297  cudaStream_t stream = 0,
298  bool debug_synchronous = false)
299  {
300  // Signed integer type for global offsets
301  typedef int OffsetT;
302 
303  // Dispatch type
304  typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Min> DispatchReduce;
305 
306  return DispatchReduce::Dispatch(
307  d_temp_storage,
308  temp_storage_bytes,
309  d_in,
310  d_out,
311  num_items,
312  cub::Min(),
313  stream,
314  debug_synchronous);
315  }
316 
317 
364  template <
365  typename InputIteratorT,
366  typename OutputIteratorT>
367  CUB_RUNTIME_FUNCTION
368  static cudaError_t ArgMin(
369  void* d_temp_storage,
370  size_t &temp_storage_bytes,
371  InputIteratorT d_in,
372  OutputIteratorT d_out,
373  int num_items,
374  cudaStream_t stream = 0,
375  bool debug_synchronous = false)
376  {
377  // Signed integer type for global offsets
378  typedef int OffsetT;
379 
380  // Wrapped input iterator
381  typedef ArgIndexInputIterator<InputIteratorT, int> ArgIndexInputIteratorT;
382  ArgIndexInputIteratorT d_argmin_in(d_in, 0);
383 
384  // Dispatch type
385  typedef DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin> DispatchReduce;
386 
387  return DispatchReduce::Dispatch(
388  d_temp_storage,
389  temp_storage_bytes,
390  d_argmin_in,
391  d_out,
392  num_items,
393  cub::ArgMin(),
394  stream,
395  debug_synchronous);
396  }
397 
398 
440  template <
441  typename InputIteratorT,
442  typename OutputIteratorT>
443  CUB_RUNTIME_FUNCTION
444  static cudaError_t Max(
445  void* d_temp_storage,
446  size_t &temp_storage_bytes,
447  InputIteratorT d_in,
448  OutputIteratorT d_out,
449  int num_items,
450  cudaStream_t stream = 0,
451  bool debug_synchronous = false)
452  {
453  // Signed integer type for global offsets
454  typedef int OffsetT;
455 
456  // Dispatch type
457  typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Max> DispatchReduce;
458 
459  return DispatchReduce::Dispatch(
460  d_temp_storage,
461  temp_storage_bytes,
462  d_in,
463  d_out,
464  num_items,
465  cub::Max(),
466  stream,
467  debug_synchronous);
468  }
469 
470 
517  template <
518  typename InputIteratorT,
519  typename OutputIteratorT>
520  CUB_RUNTIME_FUNCTION
521  static cudaError_t ArgMax(
522  void* d_temp_storage,
523  size_t &temp_storage_bytes,
524  InputIteratorT d_in,
525  OutputIteratorT d_out,
526  int num_items,
527  cudaStream_t stream = 0,
528  bool debug_synchronous = false)
529  {
530  // Signed integer type for global offsets
531  typedef int OffsetT;
532 
533  // Wrapped input iterator
534  typedef ArgIndexInputIterator<InputIteratorT, int> ArgIndexInputIteratorT;
535  ArgIndexInputIteratorT d_argmax_in(d_in, 0);
536 
537  // Dispatch type
538  typedef DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax> DispatchReduce;
539 
540  return DispatchReduce::Dispatch(
541  d_temp_storage,
542  temp_storage_bytes,
543  d_argmax_in,
544  d_out,
545  num_items,
546  cub::ArgMax(),
547  stream,
548  debug_synchronous);
549  }
550 
551 
634  template <
635  typename KeysInputIteratorT,
636  typename UniqueOutputIteratorT,
637  typename ValuesInputIteratorT,
638  typename AggregatesOutputIteratorT,
639  typename NumRunsOutputIteratorT,
640  typename ReductionOp>
641  CUB_RUNTIME_FUNCTION __forceinline__
642  static cudaError_t ReduceByKey(
643  void* d_temp_storage,
644  size_t &temp_storage_bytes,
645  KeysInputIteratorT d_keys_in,
646  UniqueOutputIteratorT d_unique_out,
647  ValuesInputIteratorT d_values_in,
648  AggregatesOutputIteratorT d_aggregates_out,
649  NumRunsOutputIteratorT d_num_runs_out,
650  ReductionOp reduction_op,
651  int num_items,
652  cudaStream_t stream = 0,
653  bool debug_synchronous = false)
654  {
655  typedef int OffsetT; // Signed integer type for global offsets
656  typedef NullType* FlagIterator; // FlagT iterator type (not used)
657  typedef NullType SelectOp; // Selection op (not used)
658  typedef Equality EqualityOp; // Default == operator
659 
660  return DispatchReduceByKey<KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOp, OffsetT>::Dispatch(
661  d_temp_storage,
662  temp_storage_bytes,
663  d_keys_in,
664  d_unique_out,
665  d_values_in,
666  d_aggregates_out,
667  d_num_runs_out,
668  EqualityOp(),
669  reduction_op,
670  num_items,
671  stream,
672  debug_synchronous);
673  }
674 
675 };
676 
681 } // CUB namespace
682 CUB_NS_POSTFIX // Optional outer namespace(s)
683 
684