40 #include "dispatch/dispatch_reduce.cuh"
41 #include "dispatch/dispatch_reduce_by_key.cuh"
42 #include "../util_namespace.cuh"
138 typename InputIteratorT,
139 typename OutputIteratorT,
140 typename ReductionOp>
143 void* d_temp_storage,
144 size_t &temp_storage_bytes,
146 OutputIteratorT d_out,
148 ReductionOp reduction_op,
149 cudaStream_t stream = 0,
150 bool debug_synchronous =
false)
156 typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, ReductionOp> DispatchReduce;
158 return DispatchReduce::Dispatch(
216 typename InputIteratorT,
217 typename OutputIteratorT>
220 void* d_temp_storage,
221 size_t &temp_storage_bytes,
223 OutputIteratorT d_out,
225 cudaStream_t stream = 0,
226 bool debug_synchronous =
false)
232 typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Sum> DispatchReduce;
234 return DispatchReduce::Dispatch(
288 typename InputIteratorT,
289 typename OutputIteratorT>
292 void* d_temp_storage,
293 size_t &temp_storage_bytes,
295 OutputIteratorT d_out,
297 cudaStream_t stream = 0,
298 bool debug_synchronous =
false)
304 typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Min> DispatchReduce;
306 return DispatchReduce::Dispatch(
365 typename InputIteratorT,
366 typename OutputIteratorT>
369 void* d_temp_storage,
370 size_t &temp_storage_bytes,
372 OutputIteratorT d_out,
374 cudaStream_t stream = 0,
375 bool debug_synchronous =
false)
382 ArgIndexInputIteratorT d_argmin_in(d_in, 0);
385 typedef DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMin> DispatchReduce;
387 return DispatchReduce::Dispatch(
441 typename InputIteratorT,
442 typename OutputIteratorT>
445 void* d_temp_storage,
446 size_t &temp_storage_bytes,
448 OutputIteratorT d_out,
450 cudaStream_t stream = 0,
451 bool debug_synchronous =
false)
457 typedef DispatchReduce<InputIteratorT, OutputIteratorT, OffsetT, cub::Max> DispatchReduce;
459 return DispatchReduce::Dispatch(
518 typename InputIteratorT,
519 typename OutputIteratorT>
522 void* d_temp_storage,
523 size_t &temp_storage_bytes,
525 OutputIteratorT d_out,
527 cudaStream_t stream = 0,
528 bool debug_synchronous =
false)
535 ArgIndexInputIteratorT d_argmax_in(d_in, 0);
538 typedef DispatchReduce<ArgIndexInputIteratorT, OutputIteratorT, OffsetT, cub::ArgMax> DispatchReduce;
540 return DispatchReduce::Dispatch(
635 typename KeysInputIteratorT,
636 typename UniqueOutputIteratorT,
637 typename ValuesInputIteratorT,
638 typename AggregatesOutputIteratorT,
639 typename NumRunsOutputIteratorT,
640 typename ReductionOp>
641 CUB_RUNTIME_FUNCTION __forceinline__
643 void* d_temp_storage,
644 size_t &temp_storage_bytes,
645 KeysInputIteratorT d_keys_in,
646 UniqueOutputIteratorT d_unique_out,
647 ValuesInputIteratorT d_values_in,
648 AggregatesOutputIteratorT d_aggregates_out,
649 NumRunsOutputIteratorT d_num_runs_out,
650 ReductionOp reduction_op,
652 cudaStream_t stream = 0,
653 bool debug_synchronous =
false)
660 return DispatchReduceByKey<KeysInputIteratorT, UniqueOutputIteratorT, ValuesInputIteratorT, AggregatesOutputIteratorT, NumRunsOutputIteratorT, EqualityOp, ReductionOp, OffsetT>::Dispatch(