diff --git a/VERSION.TXT b/VERSION.TXT index 031c0f5ac3..a5a13377dd 100644 --- a/VERSION.TXT +++ b/VERSION.TXT @@ -1 +1,17 @@ -CUB version 0.900 + +0.9.1 03/09/2013 + + - Fix for ambiguity in BlockScan::Reduce() between generic reduction and + summation. Summation entrypoints are now called ::Sum(), similar + to the convention in BlockScan. + + - Small edits to mainpage documentation and download tracking + +//----------------------------------------------------------------------------- + +0.9.0 03/07/2013 + + - Intial "preview" release. CUB is the first durable, high-performance library + of cooperative block-level, warp-level, and thread-level primitives for CUDA + kernel programming. More primitives and examples coming soon! + \ No newline at end of file diff --git a/cub/block/block_reduce.cuh b/cub/block/block_reduce.cuh index 4a4b0f493f..b158bceb18 100644 --- a/cub/block/block_reduce.cuh +++ b/cub/block/block_reduce.cuh @@ -112,7 +112,7 @@ namespace cub { * ... * * // Compute the threadblock-wide sum for thread0 - * int aggregate = BlockReduce::Reduce(smem_storage, data); + * int aggregate = BlockReduce::Sum(smem_storage, data); * * ... * \endcode @@ -137,7 +137,7 @@ namespace cub { * if (threadIdx.x < num_elements) data = ...; * * // Compute the threadblock-wide sum of valid elements in thread0 - * int aggregate = BlockReduce::Reduce(smem_storage, data, num_elements); + * int aggregate = BlockReduce::Sum(smem_storage, data, num_elements); * * ... * \endcode @@ -296,63 +296,7 @@ private: public: - /******************************************************************//** - * \name Summation reductions - *********************************************************************/ - //@{ - - /** - * \brief Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element. - * - * The return value is undefined in threads other than thread0. - * - * \smemreuse - */ - static __device__ __forceinline__ T Reduce( - SmemStorage &smem_storage, ///< [in] Shared reference to opaque SmemStorage layout - T input) ///< [in] Calling thread's input - { - Sum reduction_op; - return Reduce(smem_storage, input, reduction_op); - } - - /** - * \brief Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements. - * - * The return value is undefined in threads other than thread0. - * - * \smemreuse - * - * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. - */ - template - static __device__ __forceinline__ T Reduce( - SmemStorage &smem_storage, ///< [in] Shared reference to opaque SmemStorage layout - T (&inputs)[ITEMS_PER_THREAD]) ///< [in] Calling thread's input segment - { - Sum reduction_op; - return Reduce(smem_storage, inputs, reduction_op); - } - - - /** - * \brief Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. The first \p valid_threads threads each contribute one input element. - * - * \smemreuse - * - * The return value is undefined in threads other than thread0. - */ - static __device__ __forceinline__ T Reduce( - SmemStorage &smem_storage, ///< [in] Shared reference to opaque SmemStorage layout - T input, ///< [in] Calling thread's input - const unsigned int &valid_threads) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) - { - Sum reduction_op; - return Reduce(smem_storage, input, valid_threads); - } - - //@} /******************************************************************//** * \name Generic reductions *********************************************************************/ @@ -430,7 +374,63 @@ public: } //@} + /******************************************************************//** + * \name Summation reductions + *********************************************************************/ + //@{ + + /** + * \brief Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element. + * + * The return value is undefined in threads other than thread0. + * + * \smemreuse + */ + static __device__ __forceinline__ T Sum( + SmemStorage &smem_storage, ///< [in] Shared reference to opaque SmemStorage layout + T input) ///< [in] Calling thread's input + { + cub::Sum reduction_op; + return Reduce(smem_storage, input, reduction_op); + } + + /** + * \brief Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements. + * + * The return value is undefined in threads other than thread0. + * + * \smemreuse + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + */ + template + static __device__ __forceinline__ T Sum( + SmemStorage &smem_storage, ///< [in] Shared reference to opaque SmemStorage layout + T (&inputs)[ITEMS_PER_THREAD]) ///< [in] Calling thread's input segment + { + cub::Sum reduction_op; + return Reduce(smem_storage, inputs, reduction_op); + } + + /** + * \brief Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. The first \p valid_threads threads each contribute one input element. + * + * \smemreuse + * + * The return value is undefined in threads other than thread0. + */ + static __device__ __forceinline__ T Sum( + SmemStorage &smem_storage, ///< [in] Shared reference to opaque SmemStorage layout + T input, ///< [in] Calling thread's input + const unsigned int &valid_threads) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) + { + cub::Sum reduction_op; + return Reduce(smem_storage, input, reduction_op, valid_threads); + } + + + //@} }; /** @} */ // end of SimtCoop group diff --git a/cub/cub.cuh b/cub/cub.cuh index 1e02cdc5b2..a047a76ecb 100644 --- a/cub/cub.cuh +++ b/cub/cub.cuh @@ -72,396 +72,3 @@ #include "vector_type.cuh" #include "allocator.cuh" - -/** - * \mainpage - * - * \tableofcontents - * - * \htmlonly - * - *    - * Download CUB! - *
- * - *    - * Browse or fork CUB at GitHub! - *
- * - *    - * Join the cub-users discussion forum! - * \endhtmlonly - * - * \section sec0 (1) What is CUB? - * - * \par - * CUB is a library of high-performance parallel primitives and other utilities for - * constructing CUDA kernel software. CUB enhances productivity, performance, and portability - * by providing an abstraction layer over complex - * [block-level] (http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programming-model), - * [warp-level] (http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#hardware-implementation), and - * [thread-level](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programming-model) operations. - * - * \par - * CUB's primitives are not bound to any particular width of parallelism or to any particular - * data type. This allows them to be flexible and tunable to fit your kernels' needs. - * Thus CUB is [CUDA Unbound](index.html). - * - * \image html cub_overview.png - * - * \par - * Browse our collections of: - * - [Cooperative primitives](group___simt_coop.html), including: - * - Thread block operations (e.g., radix sort, prefix scan, reduction, etc.) - * - Warp operations (e.g., prefix scan) - * - [SIMT utilities](group___simt_utils.html), including: - * - Tile-based I/O utilities (e.g., for performing {vectorized, coalesced} data movement of {blocked, striped} data tiles) - * - Low-level thread I/O using cache-modifiers - * - Abstractions for thread block work distribution (e.g., work-stealing, even-share, etc.) - * - [Host utilities](group___host_util.html), including: - * - Caching allocator for quick management of device temporaries - * - Device reflection - * - * \section sec2 (2) Recent news - * - * \par - * - [CUB v0.9 "preview" release](https://github.com/NVlabs/cub/archive/0.9.zip) (3/7/2013). CUB is the first durable, high-performance - * library of cooperative block-level, warp-level, and thread-level primitives for CUDA kernel - * programming. More primitives and examples coming soon! - * - * \section sec3 (3) A simple example - * - * \par - * The following code snippet illustrates a simple CUDA kernel for sorting a thread block's data: - * - * \par - * \code - * #include - * - * // An tile-sorting CUDA kernel - * template < - * int BLOCK_THREADS, // Threads per block - * int ITEMS_PER_THREAD, // Items per thread - * typename T> // Numeric data type - * __global__ void TileSortKernel(T *d_in, T *d_out) - * { - * using namespace cub; - * const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; - * - * // Parameterize cub::BlockRadixSort for the parallel execution context - * typedef BlockRadixSort BlockRadixSort; - * - * // Declare the shared memory needed by BlockRadixSort - * __shared__ typename BlockRadixSort::SmemStorage smem_storage; - * - * // A segment of data items per thread - * T data[ITEMS_PER_THREAD]; - * - * // Load a tile of data using vector-load instructions - * BlockLoadVectorized(data, d_in + (blockIdx.x * TILE_SIZE)); - * - * // Sort data in ascending order - * BlockRadixSort::SortBlocked(smem_storage, data); - * - * // Store the sorted tile using vector-store instructions - * BlockStoreVectorized(data, d_out + (blockIdx.x * TILE_SIZE)); - * } - * \endcode - * - * \par - * The cub::BlockRadixSort type performs a cooperative radix sort across the - * thread block's data items. Its implementation is parameterized by the number of threads per block and the aggregate - * data type \p T and is specialized for the underlying architecture. - * - * \par - * Once instantiated, the cub::BlockRadixSort type exposes an opaque cub::BlockRadixSort::SmemStorage - * member type. The thread block uses this storage type to allocate the shared memory needed by the - * primitive. This storage type can be aliased or union'd with other types so that the - * shared memory can be reused for other purposes. - * - * \par - * Furthermore, the kernel uses CUB's primitives for vectorizing global - * loads and stores. For example, lower-level ld.global.v4.s32 - * [PTX instructions](http://docs.nvidia.com/cuda/parallel-thread-execution) - * will be generated when \p T = \p int and \p ITEMS_PER_THREAD is a multiple of 4. - * - * \section sec4 (4) Why do you need CUB? - * - * \par - * CUDA kernel software is where the complexity of parallelism is expressed. - * Programmers must reason about deadlock, livelock, synchronization, race conditions, - * shared memory layout, plurality of state, granularity, throughput, latency, - * memory bottlenecks, etc. Constructing and fine-tuning kernel code is perhaps the - * most challenging, time-consuming aspect of CUDA programming. - * - * \par - * However, with the exception of CUB, there are few (if any) software libraries of - * reusable kernel primitives. In the CUDA ecosystem, CUB is unique in this regard. - * As a [SIMT](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#hardware-implementation) - * library and software abstraction layer, CUB provides: - * -# Simplicity of composition. Parallel CUB primitives can be simply sequenced - * together in kernel code. (This convenience is analogous to programming with - * [Thrust](http://thrust.github.com/) primitives in the host program.) - * -# High performance. CUB simplifies high performance kernel development by - * taking care to implement and make available the fastest available algorithms, - * strategies, and techniques. - * -# Performance portability. CUB primitives are specialized to match - * the target hardware. Furthermore, the CUB library continually evolves to accommodate new - * algorithmic developments, hardware instructions, etc. - * -# Simplicity of performance tuning. CUB primitives provide parallel abstractions - * whose performance behavior can be statically tuned. For example, most CUB primitives - * support alternative algorithmic strategies and variable grain sizes (threads per block, - * items per thread, etc.). - * -# Robustness and durability. CUB primitives are designed to function properly for - * arbitrary data types and widths of parallelism (not just for the built-in C++ types - * or for powers-of-two threads per block). - * - * \section sec5 (5) Where is CUB positioned in the CUDA ecosystem? - * - * \par - * CUDA's programming model embodies three different levels of program execution, each - * engendering its own abstraction layer in the CUDA software stack (i.e., the "black boxes" - * below): - * - * - * - * - * - * - * - * - * - * - *
- * \par - * CUDA kernel. A single CPU thread invokes a CUDA kernel to perform - * some data-parallel function. The incorporation of entire kernels (and their - * corresponding invocation stubs) into libraries is the most common form of code reuse for - * CUDA. Libraries of CUDA kernels include the following: - * - [cuBLAS](https://developer.nvidia.com/cublas) - * - [cuFFT](https://developer.nvidia.com/cufft) - * - [cuSPARSE](https://developer.nvidia.com/cusparse) - * - [Thrust](http://thrust.github.com/) - * - * \htmlonly - * - * \endhtmlonly - *
- * \par - * Thread blocks (SIMT). Each kernel invocation comprises some number of parallel - * threads. Threads are grouped into blocks, and the entire block of threads invokes some cooperative - * function in which they communicate and synchronize with each other. There has historically been very - * little reuse of cooperative SIMT software within CUDA kernel. Libraries of thread-block primitives - * include the following: - * - [CUB](index.html) - * - * \htmlonly - * - * \endhtmlonly - *
- * \par - * CUDA thread. A single CUDA thread invokes some sequential function. - * This is the finest-grained level of CUDA software abstraction and requires - * no consideration for the scheduling or synchronization of parallel threads. CUDA libraries of - * purely data-parallel functions include the following: - * - [ CUDA Math](http://docs.nvidia.com/cuda/cuda-math-api/index.html), - * [Texture](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#texture-functions), and - * [Atomic](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions) APIs - * - [cuRAND](https://developer.nvidia.com/curand)'s device-code interface - * - [CUB](index.html) - * - * \htmlonly - * - * \endhtmlonly - *
- * - * - * \section sec6 (6) How does CUB work? - * - * \par - * CUB leverages the following programming idioms: - * -# [C++ templates](index.html#sec3sec1) - * -# [Reflective type structure](index.html#sec3sec2) - * -# [Flexible data mapping](index.html#sec3sec3) - * - * \subsection sec3sec1 6.1    C++ templates - * - * \par - * As a SIMT library, CUB must be flexible enough to accommodate a wide spectrum - * of parallel execution contexts, - * i.e., specific: - * - Data types - * - Widths of parallelism (threads per block) - * - Grain sizes (data items per thread) - * - Underlying architectures (special instructions, warp size, rules for bank conflicts, etc.) - * - Tuning requirements (e.g., latency vs. throughput) - * - * \par - * To provide this flexibility, CUB is implemented as a C++ template library. - * C++ templates are a way to write generic algorithms and data structures. - * There is no need to build CUB separately. You simply \#include the - * cub.cuh header file into your .cu CUDA C++ sources - * and compile with NVIDIA's nvcc compiler. - * - * \subsection sec3sec2 6.2    Reflective type structure - * - * \par - * Cooperation within a thread block requires shared memory for communicating between threads. - * However, the specific size and layout of the memory needed by a given - * primitive will be specific to the details of its parallel execution context (e.g., how - * many threads are calling into it, how many items are processed per thread, etc.). Furthermore, - * this shared memory must be allocated outside of the component itself if it is to be - * reused elsewhere by the thread block. - * - * \par - * \code - * // Parameterize a BlockScan type for use with 128 threads - * // and 4 items per thread - * typedef cub::BlockScan BlockScan; - * - * // Declare shared memory for BlockScan - * __shared__ typename BlockScan::SmemStorage smem_storage; - * - * // A segment of consecutive input items per thread - * int data[4]; - * - * // Obtain data in blocked order - * ... - * - * // Perform an exclusive prefix sum across the tile of data - * BlockScan::ExclusiveSum(smem_storage, data, data); - * - * \endcode - * - * \par - * To address this issue, we encapsulate cooperative procedures within - * reflective type structure (C++ classes). As illustrated in the - * cub::BlockScan example above, these primitives are C++ classes with - * interfaces that expose both: - * - Procedural entrypoints for a block of threads to invoke - * - An opaque shared memory type needed for the operation of those methods - * - * \subsection sec3sec3 6.3    Flexible data mapping - * - * \par - * We often design kernels such that each thread block is assigned a "tile" of data - * items for processing. - * - * \par - * \image html tile.png - *
Tile of eight ordered data items
- - * \par - * When the tile size equals the thread block size, the - * mapping of data onto threads is straightforward (one datum per thread). - * However, there are often performance advantages for processing more - * than one datum per thread. For these scenarios, CUB primitives - * support the following alternatives for partitioning data items across - * the block of threads: - * - * - * - * - * - * - * - * - *
- * \par - * - Blocked arrangement. The aggregate tile of items is partitioned - * evenly across threads in "blocked" fashion with threadi - * owning the ith segment of consecutive elements. - * Blocked arrangements are often desirable for algorithmic benefits (where - * long sequences of items can be processed sequentially within each thread). - * - * \par - * \image html blocked.png - *
Blocked arrangement across four threads
(emphasis on items owned by thread0)
- *
- * \par - * - Striped arrangement. The aggregate tile of items is partitioned across - * threads in "striped" fashion, i.e., the \p ITEMS_PER_THREAD items owned by - * each thread have logical stride \p BLOCK_THREADS between them. Striped arrangements - * are often desirable for data movement through global memory (where - * [read/write coalescing](http://docs.nvidia.com/cuda/cuda-c-best-practices-guide/#coalesced-access-global-memory) - * is an important performance consideration). - * - * \par - * \image html striped.png - *
Striped arrangement across four threads
(emphasis on items owned by thread0)
- *
- * - * \par - * The benefits of processing multiple items per thread (a.k.a., register blocking, granularity coarsening, etc.) include: - * - Algorithmic efficiency. Sequential work over multiple items in - * thread-private registers is cheaper than synchronized, cooperative - * work through shared memory spaces. - * - Data occupancy. The number of items that can be resident on-chip in - * thread-private register storage is often greater than the number of - * schedulable threads. - * - Instruction-level parallelism. Multiple items per thread also - * facilitates greater ILP for improved throughput and utilization. - * - * \par - * Finally, cub::BlockExchange provides operations for converting between blocked - * and striped arrangements. - * - * \section sec7 (7) Contributors - * - * \par - * CUB is developed as an open-source project by [NVIDIA Research](http://research.nvidia.com). - * The primary contributor is [Duane Merrill](http://github.com/dumerrill). - * - * \section sec8 (8) Open Source License - * - * \par - * CUB is available under the "New BSD" open-source license: - * - * \par - * \code - * Copyright (c) 2011, Duane Merrill. All rights reserved. - * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the NVIDIA CORPORATION nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * \endcode - * - */ - - -/** - * \defgroup Simt SIMT Primitives - */ - -/** - * \defgroup SimtCoop Cooperative SIMT Operations - * \ingroup Simt - */ - -/** - * \defgroup SimtUtils SIMT Utilities - * \ingroup Simt - */ - -/** - * \defgroup HostUtil Host Utilities - */ diff --git a/cub/docs/.gitignore b/docs/.gitignore similarity index 70% rename from cub/docs/.gitignore rename to docs/.gitignore index 58a12a5a3e..4054350939 100644 --- a/cub/docs/.gitignore +++ b/docs/.gitignore @@ -1,2 +1 @@ -/html /citelist.doc diff --git a/cub/docs/Doxyfile b/docs/Doxyfile similarity index 96% rename from cub/docs/Doxyfile rename to docs/Doxyfile index f31b2fbcd1..2f98a87990 100644 --- a/cub/docs/Doxyfile +++ b/docs/Doxyfile @@ -664,7 +664,20 @@ WARN_LOGFILE = # directories like "/usr/src/myproject". Separate the files or directories # with spaces. -INPUT = ../cub.cuh ../debug.cuh ../warp/warp_scan.cuh ../block/block_reduce.cuh ../block/block_scan.cuh ../block/block_radix_sort.cuh ../block/block_load.cuh ../block/block_store.cuh ../block/block_exchange.cuh ../block/block_discontinuity.cuh ../operators.cuh ../type_utils.cuh ../thread/thread_load.cuh ../thread/thread_store.cuh +INPUT = mainpage.dox +INPUT += ../cub/operators.cuh +INPUT += ../cub/type_utils.cuh +INPUT += ../cub/debug.cuh +INPUT += ../cub/warp/warp_scan.cuh +INPUT += ../cub/block/block_reduce.cuh +INPUT += ../cub/block/block_scan.cuh +INPUT += ../cub/block/block_radix_sort.cuh +INPUT += ../cub/block/block_load.cuh +INPUT += ../cub/block/block_store.cuh +INPUT += ../cub/block/block_exchange.cuh +INPUT += ../cub/block/block_discontinuity.cuh +INPUT += ../cub/thread/thread_load.cuh +INPUT += ../cub/thread/thread_store.cuh # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is @@ -927,7 +940,16 @@ HTML_EXTRA_STYLESHEET = extra_stylesheet.css # files. In the HTML_STYLESHEET file, use the file name only. Also note that # the files will be copied as-is; there are no commands or markers available. -HTML_EXTRA_FILES = images/download-icon.png images/groups-icon.png images/github-icon-747d8b799a48162434b2c0595ba1317e.png images/favicon.ico images/favicon.png images/tab_b_alt.png images/simt_abstraction.png images/kernel_abstraction.png images/devfun_abstraction.png +HTML_EXTRA_FILES = download_cub.html +HTML_EXTRA_FILES += images/download-icon.png +HTML_EXTRA_FILES += images/groups-icon.png +HTML_EXTRA_FILES += images/github-icon-747d8b799a48162434b2c0595ba1317e.png +HTML_EXTRA_FILES += images/favicon.ico +HTML_EXTRA_FILES += images/favicon.png +HTML_EXTRA_FILES += images/tab_b_alt.png +HTML_EXTRA_FILES += images/simt_abstraction.png +HTML_EXTRA_FILES += images/kernel_abstraction.png +HTML_EXTRA_FILES += images/devfun_abstraction.png # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. # Doxygen will adjust the colors in the style sheet and background images diff --git a/cub/docs/DoxygenLayout.xml b/docs/DoxygenLayout.xml similarity index 100% rename from cub/docs/DoxygenLayout.xml rename to docs/DoxygenLayout.xml diff --git a/docs/download_cub.html b/docs/download_cub.html new file mode 100644 index 0000000000..5fe0be4517 --- /dev/null +++ b/docs/download_cub.html @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + +
+If your download doesn't start in 3s: +

+ +Download CUB! +
+ + + \ No newline at end of file diff --git a/cub/docs/extra_stylesheet.css b/docs/extra_stylesheet.css similarity index 100% rename from cub/docs/extra_stylesheet.css rename to docs/extra_stylesheet.css diff --git a/cub/docs/footer.html b/docs/footer.html similarity index 100% rename from cub/docs/footer.html rename to docs/footer.html diff --git a/cub/docs/header.html b/docs/header.html similarity index 100% rename from cub/docs/header.html rename to docs/header.html diff --git a/docs/html/annotated.html b/docs/html/annotated.html new file mode 100644 index 0000000000..3e5e92c027 --- /dev/null +++ b/docs/html/annotated.html @@ -0,0 +1,171 @@ + + + + + + + +CUB: Class List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + +
+ + + + +
+ +
+ +
+
+
Class List
+
+
+
Here are the classes, structs, unions and interfaces with brief descriptions:
+
[detail level 12]
+ + + + + + + + + + + + + + + + + + + + + + + +
\NcubCUB namespace
 oCArrayTraitsArray traits
 oCBaseTraitsBasic type traits
 oCBlockDiscontinuityBlockDiscontinuity provides operations for flagging discontinuities within a list of data items partitioned across a CUDA threadblock.

+
+discont_logo.png +
+
 oCBlockExchangeBlockExchange provides operations for reorganizing the partitioning of ordered data across a CUDA threadblock.

+
+transpose_logo.png +
+
 oCBlockLoadBlockLoad provides data movement operations for reading block-arranged data from global memory.

+
+block_load_logo.png +
+
 oCBlockRadixSortBlockRadixSort provides variants of parallel radix sorting across a CUDA threadblock.

+
+sorting_logo.png +
+
 oCBlockReduceBlockReduce provides variants of parallel reduction across a CUDA threadblock.

+
+reduce_logo.png +
+
 oCBlockScanBlockScan provides variants of parallel prefix scan (and prefix sum) across a CUDA threadblock.

+
+scan_logo.png +
+
 oCBlockStoreBlockStore provides data movement operations for writing blocked-arranged data to global memory.

+
+block_store_logo.png +
+
 oCEnableIfSimple enable-if (similar to Boost)
 oCEqualityDefault equality functor
 oCEqualsType equality test
 oCIfType selection (IF ? ThenType : ElseType)
 oCIsVolatileVolatile modifier test
 oCLog2Statically determine log2(N), rounded up
 oCMaxDefault max functor
 oCNullTypeA simple "NULL" marker type
 oCNumericTraitsNumeric type traits
 oCRemoveQualifiersRemoves const and volatile qualifiers from type Tp
 oCSumDefault sum functor
 oCTraitsType traits
 \CWarpScanWarpScan provides variants of parallel prefix scan across a CUDA warp.

+
+warp_scan_logo.png +
+
+
+
+ + + + + diff --git a/docs/html/bc_s.png b/docs/html/bc_s.png new file mode 100644 index 0000000000..224b29aa98 Binary files /dev/null and b/docs/html/bc_s.png differ diff --git a/docs/html/bdwn.png b/docs/html/bdwn.png new file mode 100644 index 0000000000..940a0b9504 Binary files /dev/null and b/docs/html/bdwn.png differ diff --git a/docs/html/block__discontinuity_8cuh.html b/docs/html/block__discontinuity_8cuh.html new file mode 100644 index 0000000000..87851ed8c7 --- /dev/null +++ b/docs/html/block__discontinuity_8cuh.html @@ -0,0 +1,136 @@ + + + + + + + +CUB: block_discontinuity.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
block_discontinuity.cuh File Reference
+
+
+
#include <cuda_runtime.h>
+#include "../device_props.cuh"
+#include "../type_utils.cuh"
+#include "../operators.cuh"
+#include "../ns_wrapper.cuh"
+
+ + + + +

+Classes

class  cub::BlockDiscontinuity< T, BLOCK_THREADS >
 BlockDiscontinuity provides operations for flagging discontinuities within a list of data items partitioned across a CUDA threadblock.

+
+discont_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+

Detailed Description

+

cub::BlockDiscontinuity provides operations for flagging discontinuities within a list of data items partitioned across a CUDA threadblock.

+
+ + + + + diff --git a/docs/html/block__exchange_8cuh.html b/docs/html/block__exchange_8cuh.html new file mode 100644 index 0000000000..192a7752d1 --- /dev/null +++ b/docs/html/block__exchange_8cuh.html @@ -0,0 +1,135 @@ + + + + + + + +CUB: block_exchange.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
block_exchange.cuh File Reference
+
+
+
#include "../ns_wrapper.cuh"
+#include "../device_props.cuh"
+#include "../ptx_intrinsics.cuh"
+#include "../type_utils.cuh"
+
+ + + + +

+Classes

class  cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >
 BlockExchange provides operations for reorganizing the partitioning of ordered data across a CUDA threadblock.

+
+transpose_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+

Detailed Description

+

cub::BlockExchange provides operations for reorganizing the partitioning of ordered data across a CUDA threadblock.

+
+ + + + + diff --git a/docs/html/block__load_8cuh.html b/docs/html/block__load_8cuh.html new file mode 100644 index 0000000000..8fbc5e018c --- /dev/null +++ b/docs/html/block__load_8cuh.html @@ -0,0 +1,210 @@ + + + + + + + +CUB: block_load.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
block_load.cuh File Reference
+
+
+
#include <iterator>
+#include "../ns_wrapper.cuh"
+#include "../macro_utils.cuh"
+#include "../thread/thread_load.cuh"
+#include "../type_utils.cuh"
+#include "../vector_type.cuh"
+#include "block_exchange.cuh"
+
+ + + + +

+Classes

class  cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >
 BlockLoad provides data movement operations for reading block-arranged data from global memory.

+
+block_load_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+ + + + +

+Enumerations

enum  cub::BlockLoadPolicy { cub::BLOCK_LOAD_DIRECT, +cub::BLOCK_LOAD_VECTORIZE, +cub::BLOCK_LOAD_TRANSPOSE + }
 Tuning policy for cub::BlockLoad. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

Direct threadblock loads (blocked arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly, guarded by range. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly, guarded by range, with assignment for out-of-bound elements. More...
 
Direct threadblock loads (striped arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped directly tile using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly, guarded by range. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped directly tile using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly, guarded by range, with assignment for out-of-bound elements. More...
 
Threadblock vectorized loads (blocked arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly. More...
 
+

Detailed Description

+

Operations for reading global tiles of data into the threadblock (in blocked arrangement across threads).

+
+ + + + + diff --git a/docs/html/block__radix__sort_8cuh.html b/docs/html/block__radix__sort_8cuh.html new file mode 100644 index 0000000000..322dfc139a --- /dev/null +++ b/docs/html/block__radix__sort_8cuh.html @@ -0,0 +1,136 @@ + + + + + + + +CUB: block_radix_sort.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
block_radix_sort.cuh File Reference
+
+
+
#include "../ns_wrapper.cuh"
+#include "../device_props.cuh"
+#include "../type_utils.cuh"
+#include "block_exchange.cuh"
+#include "block_radix_rank.cuh"
+
+ + + + +

+Classes

class  cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >
 BlockRadixSort provides variants of parallel radix sorting across a CUDA threadblock.

+
+sorting_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+

Detailed Description

+

cub::BlockRadixSort provides variants of parallel radix sorting across a CUDA threadblock.

+
+ + + + + diff --git a/docs/html/block__reduce_8cuh.html b/docs/html/block__reduce_8cuh.html new file mode 100644 index 0000000000..262b923b11 --- /dev/null +++ b/docs/html/block__reduce_8cuh.html @@ -0,0 +1,138 @@ + + + + + + + +CUB: block_reduce.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
block_reduce.cuh File Reference
+
+
+
#include "../block/block_raking_grid.cuh"
+#include "../device_props.cuh"
+#include "../operators.cuh"
+#include "../thread/thread_reduce.cuh"
+#include "../thread/thread_load.cuh"
+#include "../thread/thread_store.cuh"
+#include "../ns_wrapper.cuh"
+
+ + + + +

+Classes

class  cub::BlockReduce< T, BLOCK_THREADS >
 BlockReduce provides variants of parallel reduction across a CUDA threadblock.

+
+reduce_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+

Detailed Description

+

cub::BlockReduce provides variants of parallel reduction across a CUDA threadblock

+
+ + + + + diff --git a/docs/html/block__scan_8cuh.html b/docs/html/block__scan_8cuh.html new file mode 100644 index 0000000000..591a6077bc --- /dev/null +++ b/docs/html/block__scan_8cuh.html @@ -0,0 +1,147 @@ + + + + + + + +CUB: block_scan.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
block_scan.cuh File Reference
+
+
+
#include "../device_props.cuh"
+#include "../type_utils.cuh"
+#include "../operators.cuh"
+#include "../warp/warp_scan.cuh"
+#include "../thread/thread_reduce.cuh"
+#include "../thread/thread_scan.cuh"
+#include "../ns_wrapper.cuh"
+
+ + + + +

+Classes

class  cub::BlockScan< T, BLOCK_THREADS, POLICY >
 BlockScan provides variants of parallel prefix scan (and prefix sum) across a CUDA threadblock.

+
+scan_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+ + + + +

+Enumerations

enum  cub::BlockScanPolicy { cub::BLOCK_SCAN_RAKING, +cub::BLOCK_SCAN_WARPSCANS + }
 Tuning policy for cub::BlockScan. More...
 
+

Detailed Description

+

cub::BlockScan provides variants of parallel prefix scan across a CUDA threadblock.

+
+ + + + + diff --git a/docs/html/block__store_8cuh.html b/docs/html/block__store_8cuh.html new file mode 100644 index 0000000000..ac9e2843a4 --- /dev/null +++ b/docs/html/block__store_8cuh.html @@ -0,0 +1,193 @@ + + + + + + + +CUB: block_store.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
block_store.cuh File Reference
+
+
+
#include <iterator>
+#include "../ns_wrapper.cuh"
+#include "../macro_utils.cuh"
+#include "../thread/thread_store.cuh"
+#include "../type_utils.cuh"
+#include "../vector_type.cuh"
+#include "block_exchange.cuh"
+
+ + + + +

+Classes

class  cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >
 BlockStore provides data movement operations for writing blocked-arranged data to global memory.

+
+block_store_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+ + + + +

+Enumerations

enum  cub::BlockStorePolicy { cub::BLOCK_STORE_DIRECT, +cub::BLOCK_STORE_VECTORIZE, +cub::BLOCK_STORE_TRANSPOSE + }
 Tuning policy for cub::BlockStore. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

Direct threadblock stores (blocked arrangement)
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void cub::BlockStoreDirect (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void cub::BlockStoreDirect (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly. More...
 
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockStoreDirect (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockStoreDirect (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly, guarded by range. More...
 
Direct threadblock stores (striped arrangement)
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void cub::BlockStoreDirectStriped (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Store striped tile directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void cub::BlockStoreDirectStriped (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Store striped tile directly. More...
 
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockStoreDirectStriped (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void cub::BlockStoreDirectStriped (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Store striped tile directly, guarded by range. More...
 
Threadblock vectorized stores (blocked arrangement)
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::BlockStoreVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void cub::BlockStoreVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly. More...
 
+

Detailed Description

+

Operations for writing global tiles of data from the threadblock (in blocked arrangement across threads).

+
+ + + + + diff --git a/cub/docs/images/block_load_logo.png b/docs/html/block_load_logo.png similarity index 100% rename from cub/docs/images/block_load_logo.png rename to docs/html/block_load_logo.png diff --git a/cub/docs/images/block_reduce.png b/docs/html/block_reduce.png similarity index 100% rename from cub/docs/images/block_reduce.png rename to docs/html/block_reduce.png diff --git a/cub/docs/images/block_scan_raking.png b/docs/html/block_scan_raking.png similarity index 100% rename from cub/docs/images/block_scan_raking.png rename to docs/html/block_scan_raking.png diff --git a/cub/docs/images/block_scan_warpscans.png b/docs/html/block_scan_warpscans.png similarity index 100% rename from cub/docs/images/block_scan_warpscans.png rename to docs/html/block_scan_warpscans.png diff --git a/cub/docs/images/block_store_logo.png b/docs/html/block_store_logo.png similarity index 100% rename from cub/docs/images/block_store_logo.png rename to docs/html/block_store_logo.png diff --git a/cub/docs/images/blocked.png b/docs/html/blocked.png similarity index 100% rename from cub/docs/images/blocked.png rename to docs/html/blocked.png diff --git a/docs/html/citelist.html b/docs/html/citelist.html new file mode 100644 index 0000000000..7cb5cdf4e4 --- /dev/null +++ b/docs/html/citelist.html @@ -0,0 +1,110 @@ + + + + + + + +CUB: Bibliographic References + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ +
+
+
+
Bibliographic References
+
+
+
+
[1]
+

Duane Merrill and Andrew Grimshaw. High performance and scalable radix sorting: A case study of implementing dynamic parallelism for GPU computing. Parallel Processing Letters, 21(02):245–272, 2011.

+

+
+
+
+ + + + + diff --git a/docs/html/classcub_1_1_block_discontinuity-members.html b/docs/html/classcub_1_1_block_discontinuity-members.html new file mode 100644 index 0000000000..63af424a23 --- /dev/null +++ b/docs/html/classcub_1_1_block_discontinuity-members.html @@ -0,0 +1,124 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BlockDiscontinuity< T, BLOCK_THREADS > Member List
+
+
+ +

This is the complete list of members for cub::BlockDiscontinuity< T, BLOCK_THREADS >, including all inherited members.

+ + + + + + +
Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)cub::BlockDiscontinuity< T, BLOCK_THREADS >inlinestatic
Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])cub::BlockDiscontinuity< T, BLOCK_THREADS >inlinestatic
Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)cub::BlockDiscontinuity< T, BLOCK_THREADS >inlinestatic
Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])cub::BlockDiscontinuity< T, BLOCK_THREADS >inlinestatic
SmemStorage typedefcub::BlockDiscontinuity< T, BLOCK_THREADS >
+ + + + + diff --git a/docs/html/classcub_1_1_block_discontinuity.html b/docs/html/classcub_1_1_block_discontinuity.html new file mode 100644 index 0000000000..9526319660 --- /dev/null +++ b/docs/html/classcub_1_1_block_discontinuity.html @@ -0,0 +1,548 @@ + + + + + + + +CUB: cub::BlockDiscontinuity< T, BLOCK_THREADS > Class Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::BlockDiscontinuity< T, BLOCK_THREADS > Class Template Reference
+
+
+

Detailed description

+

template<typename T, int BLOCK_THREADS>
+class cub::BlockDiscontinuity< T, BLOCK_THREADS >

+ +

BlockDiscontinuity provides operations for flagging discontinuities within a list of data items partitioned across a CUDA threadblock.

+
+discont_logo.png +
+.
+
Overview
The operations exposed by BlockDiscontinuity allow threadblocks to set "head flags" for data elements that are different from their predecessor (as specified by a binary boolean operator). Head flags are often useful for orchestrating segmented scans and reductions.
+
For convenience, BlockDiscontinuity exposes a spectrum of entrypoints that differ by:
    +
  • How the first item is handled (always-flagged vs. compared to a specific block-wide predecessor)
  • +
  • Output (discontinuity flags only vs. discontinuity flags and a copy of the last tile item for thread0)
  • +
+
+
Template Parameters
+ + + +
TThe data type to be exchanged.
BLOCK_THREADSThe threadblock size in threads.
+
+
+
Usage Considerations
    +
  • Assumes a blocked arrangement of elements across threads
  • +
  • Any threadblock-wide scalar inputs and outputs (e.g., tile_predecessor and last_tile_item) are only considered valid in thread0
  • +
  • After any operation, a subsequent __syncthreads() barrier is required if the supplied BlockDiscontinuity::SmemStorage is to be reused or repurposed by the threadblock
  • +
+
+
Performance Considerations
    +
  • Zero bank conflicts for most types.
  • +
+
+
Examples
Example 1. Given a tile of 512 non-zero matrix coordinates (ordered by row) in a blocked arrangement across a 128-thread threadblock, flag the first coordinate element of each row.
#include <cub.cuh>
+
+
struct NonZero
+
{
+
int row;
+
int col;
+
float val;
+
};
+
+
struct NewRowOp
+
{
+
__device__ __forceinline__ bool operator()(
+
const NonZero& a,
+
const NonZero& b)
+
{
+
return (a.row != b.row);
+
}
+
};
+
+
__global__ void SomeKernel(...)
+
{
+
// Parameterize BlockDiscontinuity for the parallel execution context
+
typedef cub::BlockDiscontinuity<NonZero, 128> BlockDiscontinuity;
+
+
// Declare shared memory for BlockDiscontinuity
+
__shared__ typename BlockDiscontinuity::SmemStorage smem_storage;
+
+
// A segment of consecutive non-zeroes per thread
+
NonZero coordinates[4];
+
+
// Obtain items in blocked order
+
...
+
+
// Obtain the last item of the previous tile
+
NonZero block_predecessor;
+
if (threadIdx.x == 0)
+
{
+
block_predecessor = ...
+
}
+
+
// Set head flags
+
int head_flags[4];
+
BlockDiscontinuity::Flag(smem_storage, coordinates, block_predecessor, NewRowOp(), head_flags);
+
+
+ + + + +

+Public Types

+typedef _SmemStorage SmemStorage
 The operations exposed by BlockDiscontinuity require shared memory of this type. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated shared memory or union'd with other types to facilitate shared memory reuse.
 
+ + + + + + + + + + + + + + + + + +

+Static Public Methods

template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
static __device__
+__forceinline__ void 
Flag (SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)
 Sets discontinuity flags for a tile of threadblock items, for which the first item has no reference (and is always flagged). The last tile item of the last thread is also returned to thread0. More...
 
template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
static __device__
+__forceinline__ void 
Flag (SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])
 Sets discontinuity flags for a tile of threadblock items, for which the first item has no reference (and is always flagged). More...
 
template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
static __device__
+__forceinline__ void 
Flag (SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)
 Sets discontinuity flags for a tile of threadblock items. The last tile item of the last thread is also returned to thread0. More...
 
template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
static __device__
+__forceinline__ void 
Flag (SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])
 Sets discontinuity flags for a tile of threadblock items. More...
 
+

Member Function Documentation

+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockDiscontinuity< T, BLOCK_THREADS >::Flag (SmemStoragesmem_storage,
T(&) input[ITEMS_PER_THREAD],
FlagOp flag_op,
FlagT(&) flags[ITEMS_PER_THREAD],
T & last_tile_item 
)
+
+inlinestatic
+
+ +

Sets discontinuity flags for a tile of threadblock items, for which the first item has no reference (and is always flagged). The last tile item of the last thread is also returned to thread0.

+

Assuming a blocked arrangement of elements across threads, flagsi is set non-zero for item inputi when scan_op(previous-item, inputi) is true (where previous-item is either inputi-1, or inputITEMS_PER_THREAD-1 in the previous thread). Furthermore, flagsi is always non-zero for input0 in thread0.

+

The last_tile_item is undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + + +
ITEMS_PER_THREAD[inferred] The number of consecutive items partitioned onto each thread.
FlagT[inferred] The flag type (must be an integer type)
FlagOp[inferred] Binary boolean functor type, having input parameters (const T &a, const T &b) and returning true if a discontinuity exists between a and b, otherwise false.
+
+
+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputInput items
[in]flag_opBinary boolean flag predicate
[out]flagsDiscontinuity flags
[out]last_tile_item[thread0 only] The last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1)
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockDiscontinuity< T, BLOCK_THREADS >::Flag (SmemStoragesmem_storage,
T(&) input[ITEMS_PER_THREAD],
FlagOp flag_op,
FlagT(&) flags[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Sets discontinuity flags for a tile of threadblock items, for which the first item has no reference (and is always flagged).

+

Assuming a blocked arrangement of elements across threads, flagsi is set non-zero for item inputi when scan_op(previous-item, inputi) is true (where previous-item is either inputi-1, or inputITEMS_PER_THREAD-1 in the previous thread). Furthermore, flagsi is always non-zero for input0 in thread0.

+

The last_tile_item is undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + + +
ITEMS_PER_THREAD[inferred] The number of consecutive items partitioned onto each thread.
FlagT[inferred] The flag type (must be an integer type)
FlagOp[inferred] Binary boolean functor type, having input parameters (const T &a, const T &b) and returning true if a discontinuity exists between a and b, otherwise false.
+
+
+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputInput items
[in]flag_opBinary boolean flag predicate
[out]flagsDiscontinuity flags
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockDiscontinuity< T, BLOCK_THREADS >::Flag (SmemStoragesmem_storage,
T(&) input[ITEMS_PER_THREAD],
tile_predecessor,
FlagOp flag_op,
FlagT(&) flags[ITEMS_PER_THREAD],
T & last_tile_item 
)
+
+inlinestatic
+
+ +

Sets discontinuity flags for a tile of threadblock items. The last tile item of the last thread is also returned to thread0.

+

Assuming a blocked arrangement of elements across threads, flagsi is set non-zero for item inputi when scan_op(previous-item, inputi) is true (where previous-item is either inputi-1, or inputITEMS_PER_THREAD-1 in the previous thread). For thread0, item input0 is compared against /p tile_predecessor.

+

The tile_predecessor and last_tile_item are undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + + +
ITEMS_PER_THREAD[inferred] The number of consecutive items partitioned onto each thread.
FlagT[inferred] The flag type (must be an integer type)
FlagOp[inferred] Binary boolean functor type, having input parameters (const T &a, const T &b) and returning true if a discontinuity exists between a and b, otherwise false.
+
+
+
Parameters
+ + + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputInput items
[in]tile_predecessor[thread0 only] Item with which to compare the first tile item (input0from thread0).
[in]flag_opBinary boolean flag predicate
[out]flagsDiscontinuity flags
[out]last_tile_item[thread0 only] The last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1)
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<int ITEMS_PER_THREAD, typename FlagT , typename FlagOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockDiscontinuity< T, BLOCK_THREADS >::Flag (SmemStoragesmem_storage,
T(&) input[ITEMS_PER_THREAD],
tile_predecessor,
FlagOp flag_op,
FlagT(&) flags[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Sets discontinuity flags for a tile of threadblock items.

+

Assuming a blocked arrangement of elements across threads, flagsi is set non-zero for item inputi when scan_op(previous-item, inputi) is true (where previous-item is either inputi-1, or inputITEMS_PER_THREAD-1 in the previous thread). For thread0, item input0 is compared against /p tile_predecessor.

+

The tile_predecessor and last_tile_item are undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + + +
ITEMS_PER_THREAD[inferred] The number of consecutive items partitioned onto each thread.
FlagT[inferred] The flag type (must be an integer type)
FlagOp[inferred] Binary boolean functor type, having input parameters (const T &a, const T &b) and returning true if a discontinuity exists between a and b, otherwise false.
+
+
+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputInput items
[in]tile_predecessor[thread0 only] Item with which to compare the first tile item (input0from thread0).
[in]flag_opBinary boolean flag predicate
[out]flagsDiscontinuity flags
+
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + + diff --git a/docs/html/classcub_1_1_block_exchange-members.html b/docs/html/classcub_1_1_block_exchange-members.html new file mode 100644 index 0000000000..57bee0babe --- /dev/null +++ b/docs/html/classcub_1_1_block_exchange-members.html @@ -0,0 +1,124 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD > Member List
+
+
+ +

This is the complete list of members for cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >, including all inherited members.

+ + + + + + +
BlockedToStriped(SmemStorage &smem_storage, T items[ITEMS_PER_THREAD])cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >inlinestatic
ScatterToBlocked(SmemStorage &smem_storage, T items[ITEMS_PER_THREAD], unsigned int ranks[ITEMS_PER_THREAD])cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >inlinestatic
ScatterToStriped(SmemStorage &smem_storage, T items[ITEMS_PER_THREAD], unsigned int ranks[ITEMS_PER_THREAD])cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >inlinestatic
SmemStorage typedefcub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >
StripedToBlocked(SmemStorage &smem_storage, T items[ITEMS_PER_THREAD])cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >inlinestatic
+ + + + + diff --git a/docs/html/classcub_1_1_block_exchange.html b/docs/html/classcub_1_1_block_exchange.html new file mode 100644 index 0000000000..422b16d501 --- /dev/null +++ b/docs/html/classcub_1_1_block_exchange.html @@ -0,0 +1,386 @@ + + + + + + + +CUB: cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD > Class Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD > Class Template Reference
+
+
+

Detailed description

+

template<typename T, int BLOCK_THREADS, int ITEMS_PER_THREAD>
+class cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >

+ +

BlockExchange provides operations for reorganizing the partitioning of ordered data across a CUDA threadblock.

+
+transpose_logo.png +
+.
+
Overview
BlockExchange allows threadblocks to reorganize data items between threads. More specifically, BlockExchange supports the following types of data exchanges: +
+
Template Parameters
+ + + + +
TThe data type to be exchanged.
BLOCK_THREADSThe threadblock size in threads.
ITEMS_PER_THREADThe number of items partitioned onto each thread.
+
+
+
Algorithm
Threads scatter items by item-order into shared memory, allowing one item of padding for every memory bank's worth of items. After a barrier, items are gathered in the desired arrangement.
+raking.png +
+
A threadblock of 16 threads reading a blocked arrangement of 64 items in a parallel "raking" fashion.
+
Usage Considerations
    +
  • After any operation, a subsequent __syncthreads() barrier is required if the supplied BlockExchange::SmemStorage is to be reused or repurposed by the threadblock
  • +
+
+
Performance Considerations
    +
  • Proper device-specific padding ensures zero bank conflicts for most types.
  • +
+
+
+ + + + +

+Public Types

+typedef SmemStorage SmemStorage
 The operations exposed by BlockExchange require shared memory of this type. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated shared memory or union'd with other types to facilitate shared memory reuse.
 
+ + + + + + + + + + + + + + + +

+Static Public Methods

Transpose exchanges
static __device__
+__forceinline__ void 
BlockedToStriped (SmemStorage &smem_storage, T items[ITEMS_PER_THREAD])
 Transposes data items from blocked arrangement to striped arrangement. More...
 
static __device__
+__forceinline__ void 
StripedToBlocked (SmemStorage &smem_storage, T items[ITEMS_PER_THREAD])
 Transposes data items from striped arrangement to blocked arrangement. More...
 
Scatter exchanges
static __device__
+__forceinline__ void 
ScatterToBlocked (SmemStorage &smem_storage, T items[ITEMS_PER_THREAD], unsigned int ranks[ITEMS_PER_THREAD])
 Exchanges data items annotated by rank into blocked arrangement. More...
 
static __device__
+__forceinline__ void 
ScatterToStriped (SmemStorage &smem_storage, T items[ITEMS_PER_THREAD], unsigned int ranks[ITEMS_PER_THREAD])
 Exchanges data items annotated by rank into striped arrangement. More...
 
+

Member Function Documentation

+ +
+
+
+template<typename T , int BLOCK_THREADS, int ITEMS_PER_THREAD>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >::BlockedToStriped (SmemStoragesmem_storage,
items[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Transposes data items from blocked arrangement to striped arrangement.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]itemsItems to exchange, converting between blocked and striped arrangements.
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS, int ITEMS_PER_THREAD>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >::StripedToBlocked (SmemStoragesmem_storage,
items[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Transposes data items from striped arrangement to blocked arrangement.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]itemsItems to exchange, converting between striped and blocked arrangements.
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS, int ITEMS_PER_THREAD>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >::ScatterToBlocked (SmemStoragesmem_storage,
items[ITEMS_PER_THREAD],
unsigned int ranks[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Exchanges data items annotated by rank into blocked arrangement.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]itemsItems to exchange
[in]ranksCorresponding scatter ranks
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS, int ITEMS_PER_THREAD>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >::ScatterToStriped (SmemStoragesmem_storage,
items[ITEMS_PER_THREAD],
unsigned int ranks[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Exchanges data items annotated by rank into striped arrangement.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]itemsItems to exchange
[in]ranksCorresponding scatter ranks
+
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + + diff --git a/docs/html/classcub_1_1_block_load-members.html b/docs/html/classcub_1_1_block_load-members.html new file mode 100644 index 0000000000..17dc1f739c --- /dev/null +++ b/docs/html/classcub_1_1_block_load-members.html @@ -0,0 +1,122 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER > Member List
+
+
+ +

This is the complete list of members for cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >, including all inherited members.

+ + + + +
Load(SmemStorage &smem_storage, InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >inlinestatic
Load(SmemStorage &smem_storage, InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >inlinestatic
SmemStorage typedefcub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >
+ + + + + diff --git a/docs/html/classcub_1_1_block_load.html b/docs/html/classcub_1_1_block_load.html new file mode 100644 index 0000000000..0251f4619e --- /dev/null +++ b/docs/html/classcub_1_1_block_load.html @@ -0,0 +1,338 @@ + + + + + + + +CUB: cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER > Class Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER > Class Template Reference
+
+
+

Detailed description

+

template<typename InputIterator, int BLOCK_THREADS, int ITEMS_PER_THREAD, BlockLoadPolicy POLICY = BLOCK_LOAD_DIRECT, PtxLoadModifier MODIFIER = PTX_LOAD_NONE>
+class cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >

+ +

BlockLoad provides data movement operations for reading block-arranged data from global memory.

+
+block_load_logo.png +
+.
+

BlockLoad provides a single tile-loading abstraction whose performance behavior can be statically tuned. In particular, BlockLoad implements alternative cub::BlockLoadPolicy strategies catering to different granularity sizes (i.e., number of items per thread).

+
Template Parameters
+ + + + + + +
InputIteratorThe input iterator type (may be a simple pointer type).
BLOCK_THREADSThe threadblock size in threads.
ITEMS_PER_THREADThe number of consecutive items partitioned onto each thread.
POLICY[optional] cub::BlockLoadPolicy tuning policy. Default = cub::BLOCK_LOAD_DIRECT.
MODIFIER[optional] cub::PtxLoadModifier cache modifier. Default = cub::PTX_LOAD_NONE.
+
+
+
Algorithm
BlockLoad can be (optionally) configured to use one of three alternative methods:
    +
  1. cub::BLOCK_LOAD_DIRECT. A blocked arrangement of data is read directly from memory. More...
  2. +
  3. cub::BLOCK_LOAD_VECTORIZE. A blocked arrangement of data is read directly from memory using CUDA's built-in vectorized loads as a coalescing optimization. More...
  4. +
  5. cub::BLOCK_LOAD_TRANSPOSE. A striped arrangement of data is read directly from memory and is then locally transposed into a blocked arrangement. More...
  6. +
+
+
Usage Considerations
    +
  • After any operation, a subsequent __syncthreads() barrier is required if the supplied BlockLoad::SmemStorage is to be reused or repurposed by the threadblock
  • +
+
+
Performance Considerations
+
+
Examples
Example 1. Have a 128-thread threadblock directly load a blocked arrangement of four consecutive integers per thread.
#include <cub.cuh>
+
+
__global__ void SomeKernel(int *d_in, ...)
+
{
+
// Parameterize BlockLoad for the parallel execution context
+
typedef cub::BlockLoad<int*, 128, 4> BlockLoad;
+
+
// Declare shared memory for BlockLoad
+
__shared__ typename BlockLoad::SmemStorage smem_storage;
+
+
// A segment of consecutive items per thread
+
int data[4];
+
+
// Load a tile of data at this block's offset
+
BlockLoad::Load(smem_storage, d_in + blockIdx.x * 128 * 4, data);
+
+
...
+
+
Example 2. Have a threadblock load a blocked arrangement of ITEMS_PER_THREAD consecutive integers per thread using vectorized loads and global-only caching:
#include <cub.cuh>
+
+
template <
+
int BLOCK_THREADS,
+
int ITEMS_PER_THREAD>
+
__global__ void SomeKernel(int *d_in, ...)
+
{
+
// Parameterize BlockLoad for the parallel execution context
+ +
+
// Declare shared memory for BlockLoad
+
__shared__ typename BlockLoad::SmemStorage smem_storage;
+
+
// A segment of consecutive items per thread
+
int data[ITEMS_PER_THREAD];
+
+
// Load a tile of data at this block's offset
+
BlockLoad::Load(smem_storage, d_in + blockIdx.x * BLOCK_THREADS * ITEMS_PER_THREAD, data);
+
+
...
+

+
+
+ + + + +

+Public Types

+typedef _SmemStorage SmemStorage
 The operations exposed by BlockLoad require shared memory of this type. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated shared memory or union'd with other types to facilitate shared memory reuse.
 
+ + + + + + + + +

+Static Public Methods

static __device__
+__forceinline__ void 
Load (SmemStorage &smem_storage, InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock. More...
 
template<typename SizeT >
static __device__
+__forceinline__ void 
Load (SmemStorage &smem_storage, InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock, guarded by range. More...
 
+

Member Function Documentation

+ +
+
+
+template<typename InputIterator , int BLOCK_THREADS, int ITEMS_PER_THREAD, BlockLoadPolicy POLICY = BLOCK_LOAD_DIRECT, PtxLoadModifier MODIFIER = PTX_LOAD_NONE>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >::Load (SmemStoragesmem_storage,
InputIterator block_itr,
T(&) items[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Load a tile of items across a threadblock.

+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]block_itrThe threadblock's base input iterator for loading from
[out]itemsData to load
+
+
+ +
+
+ +
+
+
+template<typename InputIterator , int BLOCK_THREADS, int ITEMS_PER_THREAD, BlockLoadPolicy POLICY = BLOCK_LOAD_DIRECT, PtxLoadModifier MODIFIER = PTX_LOAD_NONE>
+
+template<typename SizeT >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >::Load (SmemStoragesmem_storage,
InputIterator block_itr,
const SizeT & guarded_items,
T(&) items[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Load a tile of items across a threadblock, guarded by range.

+
Template Parameters
+ + +
SizeT[inferred] Integer type for offsets
+
+
+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]block_itrThe threadblock's base input iterator for loading from
[in]guarded_itemsNumber of valid items in the tile
[out]itemsData to load
+
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + + diff --git a/docs/html/classcub_1_1_block_radix_sort-members.html b/docs/html/classcub_1_1_block_radix_sort-members.html new file mode 100644 index 0000000000..90c56f20a2 --- /dev/null +++ b/docs/html/classcub_1_1_block_radix_sort-members.html @@ -0,0 +1,126 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG > Member List
+
+
+ +

This is the complete list of members for cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >, including all inherited members.

+ + + + + + + + +
SmemStorage typedefcub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >
SortBlocked(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >inlinestatic
SortBlocked(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >inlinestatic
SortBlockedToStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >inlinestatic
SortBlockedToStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >inlinestatic
SortStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >inlinestatic
SortStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >inlinestatic
+ + + + + diff --git a/docs/html/classcub_1_1_block_radix_sort.html b/docs/html/classcub_1_1_block_radix_sort.html new file mode 100644 index 0000000000..818ce8ce91 --- /dev/null +++ b/docs/html/classcub_1_1_block_radix_sort.html @@ -0,0 +1,629 @@ + + + + + + + +CUB: cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG > Class Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG > Class Template Reference
+
+
+

Detailed description

+

template<typename KeyType, int BLOCK_THREADS, int ITEMS_PER_THREAD, typename ValueType = NullType, int RADIX_BITS = 5, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte>
+class cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >

+ +

BlockRadixSort provides variants of parallel radix sorting across a CUDA threadblock.

+
+sorting_logo.png +
+.
+
Overview
The radix sorting method relies upon a positional representation for keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, characters, etc.) specified from least-significant to most-significant. For a given input sequence of keys and a set of rules specifying a total ordering of the symbolic alphabet, the radix sorting method produces a lexicographic ordering of those keys.
+
BlockRadixSort can sort all of the built-in C++ numeric primitive types, e.g.: unsigned char, int, double, etc. Within each key, the implementation treats fixed-length bit-sequences of RADIX_BITS as radix digit places. Although the direct radix sorting method can only be applied to unsigned integral types, BlockRadixSort is able to sort signed and floating-point types via simple bit-wise transformations that ensure lexicographic key ordering.
+
For convenience, BlockRadixSort exposes a spectrum of entrypoints that differ by:
    +
  • Value association (keys-only vs. key-value-pairs)
  • +
  • Input/output data arrangements (combinations of blocked and striped arrangements)
  • +
+
+
Template Parameters
+ + + + + + + +
KeyTypeKey type
BLOCK_THREADSThe threadblock size in threads
ITEMS_PER_THREADThe number of items per thread
ValueType[optional] Value type (default: cub::NullType)
RADIX_BITS[optional] The number of radix bits per digit place (default: 5 bits)
SMEM_CONFIG[optional] Shared memory bank mode (default: cudaSharedMemBankSizeFourByte)
+
+
+
Usage Considerations
    +
  • After any sorting operation, a subsequent __syncthreads() barrier is required if the supplied BlockRadixSort::SmemStorage is to be reused or repurposed by the threadblock.
  • +
  • BlockRadixSort can only accommodate one associated tile of values. To "truck along" more than one tile of values, simply perform a key-value sort of the keys paired with a temporary value array that enumerates the key indices. The reordered indices can then be used as a gather-vector for exchanging other associated tile data through shared memory.
  • +
+
+
Performance Considerations
    +
  • The operations are most efficient (lowest instruction overhead) when:
      +
    • BLOCK_THREADS is a multiple of the architecture's warp size
    • +
    • KeyType is an unsigned integral type
    • +
    • Keys are partitioned across the threadblock in a blocked arrangement
    • +
    +
  • +
+
+
Algorithm
BlockRadixSort is based on the method presented by Merrill et al.[1]. The implementation has O(n) work complexity and iterates over digit places using rounds constructed of +
+
Examples
Example 1. Perform a radix sort over a tile of 512 integer keys that are partitioned in a blocked arrangement across a 128-thread threadblock (where each thread holds 4 keys).
#include <cub.cuh>
+
+
__global__ void SomeKernel(...)
+
{
+
// Parameterize BlockRadixSort for the parallel execution context
+ +
+
// Declare shared memory for BlockRadixSort
+
__shared__ typename BlockRadixSort::SmemStorage smem_storage;
+
+
// A segment of consecutive input items per thread
+
int keys[4];
+
+
// Obtain keys in blocked order
+
...
+
+
// Sort keys in ascending order
+
BlockRadixSort::SortBlocked(smem_storage, keys);
+
+
Example 2. Perform a key-value radix sort over the lower 20-bits of a tile of 32-bit integer keys paired with floating-point values. The data are partitioned in a striped arrangement across the threadblock.
#include <cub.cuh>
+
+
template <int BLOCK_THREADS, int ITEMS_PER_THREAD>
+
__global__ void SomeKernel(...)
+
{
+
// Parameterize BlockRadixSort for the parallel execution context
+ +
+
// Declare shared memory for BlockRadixSort
+
__shared__ typename BlockRadixSort::SmemStorage smem_storage;
+
+
// Input keys and values per thread (striped across the threadblock)
+
int keys[ITEMS_PER_THREAD];
+
float values[ITEMS_PER_THREAD];
+
+
// Obtain keys and values in striped order
+
...
+
+
// Sort pairs in ascending order (using only the lower 20 distinguishing key bits)
+
BlockRadixSort::SortStriped(smem_storage, keys, values, 0, 20);
+
}
+
+
+ + + + +

+Public Types

+typedef _SmemStorage SmemStorage
 The operations exposed by BlockRadixSort require shared memory of this type. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated shared memory or union'd with other types to facilitate shared memory reuse.
 
+ + + + + + + + + + + + + + + + + + + + + +

+Static Public Methods

Keys-only sorting
static __device__
+__forceinline__ void 
SortBlocked (SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)
 Performs a threadblock-wide radix sort over a blocked arrangement of keys. More...
 
static __device__
+__forceinline__ void 
SortBlockedToStriped (SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)
 Performs a radix sort across a blocked arrangement of keys, leaving them in a striped arrangement. More...
 
static __device__
+__forceinline__ void 
SortStriped (SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)
 Performs a radix sort across a striped arrangement of keys. More...
 
Key-value pair sorting
static __device__
+__forceinline__ void 
SortBlocked (SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)
 Performs a radix sort across a blocked arrangement of keys and values. More...
 
static __device__
+__forceinline__ void 
SortBlockedToStriped (SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)
 Performs a radix sort across a blocked arrangement of keys and values, leaving them in a striped arrangement. More...
 
static __device__
+__forceinline__ void 
SortStriped (SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)
 Performs a radix sort across a striped arrangement of keys and values. More...
 
+

Member Function Documentation

+ +
+
+
+template<typename KeyType , int BLOCK_THREADS, int ITEMS_PER_THREAD, typename ValueType = NullType, int RADIX_BITS = 5, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >::SortBlocked (SmemStoragesmem_storage,
KeyType(&) keys[ITEMS_PER_THREAD],
unsigned int begin_bit = 0,
const unsigned int & end_bit = sizeof(KeyType) * 8 
)
+
+inlinestatic
+
+ +

Performs a threadblock-wide radix sort over a blocked arrangement of keys.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]keysKeys to sort
[in]begin_bit[optional] The beginning (least-significant) bit index needed for key comparison
[in]end_bit[optional] The past-the-end (most-significant) bit index needed for key comparison
+
+
+ +
+
+ +
+
+
+template<typename KeyType , int BLOCK_THREADS, int ITEMS_PER_THREAD, typename ValueType = NullType, int RADIX_BITS = 5, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >::SortBlockedToStriped (SmemStoragesmem_storage,
KeyType(&) keys[ITEMS_PER_THREAD],
unsigned int begin_bit = 0,
const unsigned int & end_bit = sizeof(KeyType) * 8 
)
+
+inlinestatic
+
+ +

Performs a radix sort across a blocked arrangement of keys, leaving them in a striped arrangement.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]keysKeys to sort
[in]begin_bit[optional] The beginning (least-significant) bit index needed for key comparison
[in]end_bit[optional] The past-the-end (most-significant) bit index needed for key comparison
+
+
+ +
+
+ +
+
+
+template<typename KeyType , int BLOCK_THREADS, int ITEMS_PER_THREAD, typename ValueType = NullType, int RADIX_BITS = 5, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >::SortStriped (SmemStoragesmem_storage,
KeyType(&) keys[ITEMS_PER_THREAD],
unsigned int begin_bit = 0,
const unsigned int & end_bit = sizeof(KeyType) * 8 
)
+
+inlinestatic
+
+ +

Performs a radix sort across a striped arrangement of keys.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]keysKeys to sort
[in]begin_bit[optional] The beginning (least-significant) bit index needed for key comparison
[in]end_bit[optional] The past-the-end (most-significant) bit index needed for key comparison
+
+
+ +
+
+ +
+
+
+template<typename KeyType , int BLOCK_THREADS, int ITEMS_PER_THREAD, typename ValueType = NullType, int RADIX_BITS = 5, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >::SortBlocked (SmemStoragesmem_storage,
KeyType(&) keys[ITEMS_PER_THREAD],
ValueType(&) values[ITEMS_PER_THREAD],
unsigned int begin_bit = 0,
const unsigned int & end_bit = sizeof(KeyType) * 8 
)
+
+inlinestatic
+
+ +

Performs a radix sort across a blocked arrangement of keys and values.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]keysKeys to sort
[in,out]valuesValues to sort
[in]begin_bit[optional] The beginning (least-significant) bit index needed for key comparison
[in]end_bit[optional] The past-the-end (most-significant) bit index needed for key comparison
+
+
+ +
+
+ +
+
+
+template<typename KeyType , int BLOCK_THREADS, int ITEMS_PER_THREAD, typename ValueType = NullType, int RADIX_BITS = 5, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >::SortBlockedToStriped (SmemStoragesmem_storage,
KeyType(&) keys[ITEMS_PER_THREAD],
ValueType(&) values[ITEMS_PER_THREAD],
unsigned int begin_bit = 0,
const unsigned int & end_bit = sizeof(KeyType) * 8 
)
+
+inlinestatic
+
+ +

Performs a radix sort across a blocked arrangement of keys and values, leaving them in a striped arrangement.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]keysKeys to sort
[in,out]valuesValues to sort
[in]begin_bit[optional] The beginning (least-significant) bit index needed for key comparison
[in]end_bit[optional] The past-the-end (most-significant) bit index needed for key comparison
+
+
+ +
+
+ +
+
+
+template<typename KeyType , int BLOCK_THREADS, int ITEMS_PER_THREAD, typename ValueType = NullType, int RADIX_BITS = 5, cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >::SortStriped (SmemStoragesmem_storage,
KeyType(&) keys[ITEMS_PER_THREAD],
ValueType(&) values[ITEMS_PER_THREAD],
unsigned int begin_bit = 0,
const unsigned int & end_bit = sizeof(KeyType) * 8 
)
+
+inlinestatic
+
+ +

Performs a radix sort across a striped arrangement of keys and values.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in,out]keysKeys to sort
[in,out]valuesValues to sort
[in]begin_bit[optional] The beginning (least-significant) bit index needed for key comparison
[in]end_bit[optional] The past-the-end (most-significant) bit index needed for key comparison
+
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + + diff --git a/docs/html/classcub_1_1_block_reduce-members.html b/docs/html/classcub_1_1_block_reduce-members.html new file mode 100644 index 0000000000..a70842a582 --- /dev/null +++ b/docs/html/classcub_1_1_block_reduce-members.html @@ -0,0 +1,126 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BlockReduce< T, BLOCK_THREADS > Member List
+
+
+ +

This is the complete list of members for cub::BlockReduce< T, BLOCK_THREADS >, including all inherited members.

+ + + + + + + + +
Reduce(SmemStorage &smem_storage, T input, ReductionOp reduction_op)cub::BlockReduce< T, BLOCK_THREADS >inlinestatic
Reduce(SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)cub::BlockReduce< T, BLOCK_THREADS >inlinestatic
Reduce(SmemStorage &smem_storage, T input, ReductionOp reduction_op, const unsigned int &valid_threads)cub::BlockReduce< T, BLOCK_THREADS >inlinestatic
SmemStorage typedefcub::BlockReduce< T, BLOCK_THREADS >
Sum(SmemStorage &smem_storage, T input)cub::BlockReduce< T, BLOCK_THREADS >inlinestatic
Sum(SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD])cub::BlockReduce< T, BLOCK_THREADS >inlinestatic
Sum(SmemStorage &smem_storage, T input, const unsigned int &valid_threads)cub::BlockReduce< T, BLOCK_THREADS >inlinestatic
+ + + + + diff --git a/docs/html/classcub_1_1_block_reduce.html b/docs/html/classcub_1_1_block_reduce.html new file mode 100644 index 0000000000..9120d57919 --- /dev/null +++ b/docs/html/classcub_1_1_block_reduce.html @@ -0,0 +1,601 @@ + + + + + + + +CUB: cub::BlockReduce< T, BLOCK_THREADS > Class Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::BlockReduce< T, BLOCK_THREADS > Class Template Reference
+
+
+

Detailed description

+

template<typename T, int BLOCK_THREADS>
+class cub::BlockReduce< T, BLOCK_THREADS >

+ +

BlockReduce provides variants of parallel reduction across a CUDA threadblock.

+
+reduce_logo.png +
+.
+
Overview
A reduction (or fold) uses a binary combining operator to compute a single aggregate from a list of input elements.
+
For convenience, BlockReduce exposes a spectrum of entrypoints that differ by:
    +
  • Granularity (single vs. multiple items per thread)
  • +
  • Input (full tile vs. partial-tile having some undefined elements)
  • +
+
+
Template Parameters
+ + + +
TThe reduction input/output element type
BLOCK_THREADSThe threadblock size in threads
+
+
+
Algorithm
BlockReduce entrypoints have O(n) work complexity and are implemented in three phases:
    +
  1. Sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory.
  2. +
  3. A single-warp performs a raking upsweep across partial reductions shared each thread in the threadblock.
  4. +
  5. A warp-synchronous Kogge-Stone style reduction within the raking warp to produce the total aggregate.
    +block_reduce.png +
    +
    Data flow for a hypothetical 16-thread threadblock and 4-thread raking warp.
  6. +
+
+
Usage Considerations
    +
  • Supports non-commutative reduction operators
  • +
  • Supports partially-full threadblocks (i.e., the most-significant thread ranks having undefined values).
  • +
  • Assumes a blocked arrangement of elements across threads
  • +
  • The threadblock-wide scalar reduction output is only considered valid in thread0
  • +
  • After any operation, a subsequent __syncthreads() barrier is required if the supplied BlockReduce::SmemStorage is to be reused or repurposed by the threadblock
  • +
+
+
Performance Considerations
    +
  • Very efficient (only one synchronization barrier).
  • +
  • Zero bank conflicts for most types.
  • +
  • Computation is slightly more efficient (i.e., having lower instruction overhead) for:
      +
    • T is a built-in C++ primitive or CUDA vector type (e.g., short, int2, double, float2, etc.)
    • +
    • BLOCK_THREADS is a multiple of the architecture's warp size
    • +
    • Every thread has a valid input (i.e., full vs. partial-tiles)
    • +
    +
  • +
+
+
Examples
+
Example 1. Perform a simple reduction of 512 integer keys that are partitioned in a blocked arrangement across a 128-thread threadblock (where each thread holds 4 keys).
#include <cub.cuh>
+
+
__global__ void SomeKernel(...)
+
{
+
// Parameterize BlockReduce for the parallel execution context
+
typedef cub::BlockReduce<int, 128> BlockReduce;
+
+
// Declare shared memory for BlockReduce
+
__shared__ typename BlockReduce::SmemStorage smem_storage;
+
+
// A segment of consecutive input items per thread
+
int data[4];
+
+
// Obtain items in blocked order
+
...
+
+
// Compute the threadblock-wide sum for thread0
+
int aggregate = BlockReduce::Sum(smem_storage, data);
+
+
...
+
+
Example 2: Perform a guarded reduction of only num_elements keys that are partitioned in a partially-full blocked arrangement across BLOCK_THREADS threads.
#include <cub.cuh>
+
+
template <int BLOCK_THREADS>
+
__global__ void SomeKernel(..., int num_elements)
+
{
+
// Parameterize BlockReduce for use with BLOCK_THREADS threads on type int
+ +
+
// Declare shared memory for BlockReduce
+
__shared__ typename BlockReduce::SmemStorage smem_storage;
+
+
// Guarded load
+
int data;
+
if (threadIdx.x < num_elements) data = ...;
+
+
// Compute the threadblock-wide sum of valid elements in thread0
+
int aggregate = BlockReduce::Sum(smem_storage, data, num_elements);
+
+
...
+
+
+ + + + +

+Public Types

+typedef _SmemStorage SmemStorage
 The operations exposed by BlockReduce require shared memory of this type. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated shared memory or union'd with other types to facilitate shared memory reuse.
 
+ + + + + + + + + + + + + + + + + + + + + + + + + +

+Static Public Methods

Generic reductions
template<typename ReductionOp >
static __device__ __forceinline__ T Reduce (SmemStorage &smem_storage, T input, ReductionOp reduction_op)
 Computes a threadblock-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes one input element. More...
 
template<int ITEMS_PER_THREAD, typename ReductionOp >
static __device__ __forceinline__ T Reduce (SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)
 Computes a threadblock-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes an array of consecutive input elements. More...
 
template<typename ReductionOp >
static __device__ __forceinline__ T Reduce (SmemStorage &smem_storage, T input, ReductionOp reduction_op, const unsigned int &valid_threads)
 Computes a threadblock-wide reduction for thread0 using the specified binary reduction functor. The first valid_threads threads each contribute one input element. More...
 
Summation reductions
static __device__ __forceinline__ T Sum (SmemStorage &smem_storage, T input)
 Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element. More...
 
template<int ITEMS_PER_THREAD>
static __device__ __forceinline__ T Sum (SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD])
 Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements. More...
 
static __device__ __forceinline__ T Sum (SmemStorage &smem_storage, T input, const unsigned int &valid_threads)
 Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. The first valid_threads threads each contribute one input element. More...
 
+

Member Function Documentation

+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<typename ReductionOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ T cub::BlockReduce< T, BLOCK_THREADS >::Reduce (SmemStoragesmem_storage,
input,
ReductionOp reduction_op 
)
+
+inlinestatic
+
+ +

Computes a threadblock-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes one input element.

+

The return value is undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ReductionOp[inferred] Binary reduction functor type (a model of Binary Function).
+
+
+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input
[in]reduction_opBinary associative reduction functor
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<int ITEMS_PER_THREAD, typename ReductionOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ T cub::BlockReduce< T, BLOCK_THREADS >::Reduce (SmemStoragesmem_storage,
T(&) inputs[ITEMS_PER_THREAD],
ReductionOp reduction_op 
)
+
+inlinestatic
+
+ +

Computes a threadblock-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes an array of consecutive input elements.

+

The return value is undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + +
ITEMS_PER_THREAD[inferred] The number of consecutive items partitioned onto each thread.
ReductionOp[inferred] Binary reduction functor type (a model of Binary Function).
+
+
+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputsCalling thread's input segment
[in]reduction_opBinary associative reduction functor
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<typename ReductionOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ T cub::BlockReduce< T, BLOCK_THREADS >::Reduce (SmemStoragesmem_storage,
input,
ReductionOp reduction_op,
const unsigned int & valid_threads 
)
+
+inlinestatic
+
+ +

Computes a threadblock-wide reduction for thread0 using the specified binary reduction functor. The first valid_threads threads each contribute one input element.

+

The return value is undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ReductionOp[inferred] Binary reduction functor type (a model of Binary Function).
+
+
+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input
[in]reduction_opBinary associative reduction functor
[in]valid_threadsNumber of threads containing valid elements (may be less than BLOCK_THREADS)
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ T cub::BlockReduce< T, BLOCK_THREADS >::Sum (SmemStoragesmem_storage,
input 
)
+
+inlinestatic
+
+ +

Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element.

+

The return value is undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+
+template<int ITEMS_PER_THREAD>
+ + + + + +
+ + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ T cub::BlockReduce< T, BLOCK_THREADS >::Sum (SmemStoragesmem_storage,
T(&) inputs[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements.

+

The return value is undefined in threads other than thread0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ITEMS_PER_THREAD[inferred] The number of consecutive items partitioned onto each thread.
+
+
+
Parameters
+ + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputsCalling thread's input segment
+
+
+ +
+
+ +
+
+
+template<typename T , int BLOCK_THREADS>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ T cub::BlockReduce< T, BLOCK_THREADS >::Sum (SmemStoragesmem_storage,
input,
const unsigned int & valid_threads 
)
+
+inlinestatic
+
+ +

Computes a threadblock-wide reduction for thread0 using addition (+) as the reduction operator. The first valid_threads threads each contribute one input element.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+

The return value is undefined in threads other than thread0.

+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input
[in]valid_threadsNumber of threads containing valid elements (may be less than BLOCK_THREADS)
+
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + + diff --git a/docs/html/classcub_1_1_block_scan-members.html b/docs/html/classcub_1_1_block_scan-members.html new file mode 100644 index 0000000000..7d5fce93ae --- /dev/null +++ b/docs/html/classcub_1_1_block_scan-members.html @@ -0,0 +1,150 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BlockScan< T, BLOCK_THREADS, POLICY > Member List
+
+
+ +

This is the complete list of members for cub::BlockScan< T, BLOCK_THREADS, POLICY >, including all inherited members.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], const T &identity, ScanOp scan_op, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, T identity, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T identity, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, T identity, ScanOp scan_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], const T &identity, ScanOp scan_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T input, T &output)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate, BlockPrefixOp &block_prefix_op)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T input, T &output)cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])cub::BlockScan< T, BLOCK_THREADS, POLICY >inlinestatic
SmemStorage typedefcub::BlockScan< T, BLOCK_THREADS, POLICY >
+ + + + + diff --git a/docs/html/classcub_1_1_block_scan.html.REMOVED.git-id b/docs/html/classcub_1_1_block_scan.html.REMOVED.git-id new file mode 100644 index 0000000000..71a6a74ff8 --- /dev/null +++ b/docs/html/classcub_1_1_block_scan.html.REMOVED.git-id @@ -0,0 +1 @@ +9610aa94f92ca87a6e60a940472ee60606ac256b \ No newline at end of file diff --git a/docs/html/classcub_1_1_block_store-members.html b/docs/html/classcub_1_1_block_store-members.html new file mode 100644 index 0000000000..f50b2f7688 --- /dev/null +++ b/docs/html/classcub_1_1_block_store-members.html @@ -0,0 +1,122 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER > Member List
+
+
+ +

This is the complete list of members for cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >, including all inherited members.

+ + + + +
SmemStorage typedefcub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >
Store(SmemStorage &smem_storage, OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >inlinestatic
Store(SmemStorage &smem_storage, OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >inlinestatic
+ + + + + diff --git a/docs/html/classcub_1_1_block_store.html b/docs/html/classcub_1_1_block_store.html new file mode 100644 index 0000000000..ae1aac66b3 --- /dev/null +++ b/docs/html/classcub_1_1_block_store.html @@ -0,0 +1,342 @@ + + + + + + + +CUB: cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER > Class Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER > Class Template Reference
+
+
+

Detailed description

+

template<typename OutputIterator, int BLOCK_THREADS, int ITEMS_PER_THREAD, BlockStorePolicy POLICY = BLOCK_STORE_DIRECT, PtxStoreModifier MODIFIER = PTX_STORE_NONE>
+class cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >

+ +

BlockStore provides data movement operations for writing blocked-arranged data to global memory.

+
+block_store_logo.png +
+.
+

BlockStore provides a single tile-storing abstraction whose performance behavior can be statically tuned. In particular, BlockStore implements several alternative cub::BlockStorePolicy strategies catering to different granularity sizes (i.e., number of items per thread).

+
Template Parameters
+ + + + + + +
OutputIteratorThe input iterator type (may be a simple pointer type).
BLOCK_THREADSThe threadblock size in threads.
ITEMS_PER_THREADThe number of consecutive items partitioned onto each thread.
POLICY[optional] cub::BlockStorePolicy tuning policy enumeration. Default = cub::BLOCK_STORE_DIRECT.
MODIFIER[optional] cub::PtxStoreModifier cache modifier. Default = cub::PTX_STORE_NONE.
+
+
+
Algorithm
BlockStore can be (optionally) configured to use one of three alternative methods:
    +
  1. cub::BLOCK_STORE_DIRECT. A blocked arrangement of data is written directly to memory. More...
  2. +
  3. cub::BLOCK_STORE_VECTORIZE. A blocked arrangement of data is written directly to memory using CUDA's built-in vectorized stores as a coalescing optimization. More...
  4. +
  5. cub::BLOCK_STORE_TRANSPOSE. A blocked arrangement is locally transposed into a striped arrangement which is then written to memory. More...
  6. +
+
+
Usage Considerations
    +
  • After any operation, a subsequent __syncthreads() barrier is required if the supplied BlockStore::SmemStorage is to be reused or repurposed by the threadblock
  • +
+
+
Performance Considerations
+
+
Examples
Example 1. Have a 128-thread threadblock directly store a blocked arrangement of four consecutive integers per thread.
#include <cub.cuh>
+
+
template <int BLOCK_THREADS>
+
__global__ void SomeKernel(int *d_out, ...)
+
{
+
// Parameterize BlockStore for the parallel execution context
+
typedef cub::BlockStore<int*, 128, 4> BlockStore;
+
+
// Declare shared memory for BlockStore
+
__shared__ typename BlockStore::SmemStorage smem_storage;
+
+
// A segment of consecutive items per thread
+
int data[4];
+
+
// Store a tile of data
+
BlockStore::Store(smem_storage, d_out + blockIdx.x * 128 * 4, data);
+
+
...
+
}
+
+

Example 2. Have a threadblock store a blocked arrangement of ITEMS_PER_THREAD consecutive integers per thread using vectorized stores and global-only caching:

+
#include <cub.cuh>
+
+
template <int BLOCK_THREADS>
+
__global__ void SomeKernel(int *d_out, ...)
+
{
+
const int ITEMS_PER_THREAD = 4;
+
+
// Parameterize BlockStore for the parallel execution context
+ +
+
// Declare shared memory for BlockStore
+
__shared__ typename BlockStore::SmemStorage smem_storage;
+
+
// A segment of consecutive items per thread
+
int data[4];
+
+
// Store a tile of data using vector-store instructions if possible
+
BlockStore::Store(smem_storage, d_out + blockIdx.x * BLOCK_THREADS * 4, data);
+
+
...
+
}
+


+

+
+ + + + +

+Public Types

+typedef _SmemStorage SmemStorage
 The operations exposed by BlockStore require shared memory of this type. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated shared memory or union'd with other types to facilitate shared memory reuse.
 
+ + + + + + + + +

+Static Public Methods

static __device__
+__forceinline__ void 
Store (SmemStorage &smem_storage, OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock. More...
 
template<typename SizeT >
static __device__
+__forceinline__ void 
Store (SmemStorage &smem_storage, OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock, guarded by range. More...
 
+

Member Function Documentation

+ +
+
+
+template<typename OutputIterator , int BLOCK_THREADS, int ITEMS_PER_THREAD, BlockStorePolicy POLICY = BLOCK_STORE_DIRECT, PtxStoreModifier MODIFIER = PTX_STORE_NONE>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >::Store (SmemStoragesmem_storage,
OutputIterator block_itr,
T(&) items[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Store a tile of items across a threadblock.

+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]block_itrThe threadblock's base output iterator for storing to
[in]itemsData to store
+
+
+ +
+
+ +
+
+
+template<typename OutputIterator , int BLOCK_THREADS, int ITEMS_PER_THREAD, BlockStorePolicy POLICY = BLOCK_STORE_DIRECT, PtxStoreModifier MODIFIER = PTX_STORE_NONE>
+
+template<typename SizeT >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >::Store (SmemStoragesmem_storage,
OutputIterator block_itr,
const SizeT & guarded_items,
T(&) items[ITEMS_PER_THREAD] 
)
+
+inlinestatic
+
+ +

Store a tile of items across a threadblock, guarded by range.

+
Template Parameters
+ + +
SizeT[inferred] Integer type for offsets
+
+
+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]block_itrThe threadblock's base output iterator for storing to
[in]guarded_itemsNumber of valid items in the tile
[in]itemsData to store
+
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + + diff --git a/docs/html/classcub_1_1_warp_scan-members.html b/docs/html/classcub_1_1_warp_scan-members.html new file mode 100644 index 0000000000..e1dfdcb2ad --- /dev/null +++ b/docs/html/classcub_1_1_warp_scan-members.html @@ -0,0 +1,135 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS > Member List
+
+
+ +

This is the complete list of members for cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >, including all inherited members.

+ + + + + + + + + + + + + + + + + +
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T input, T &output)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T input, T &output)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >inlinestatic
SmemStorage typedefcub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >
+ + + + + diff --git a/docs/html/classcub_1_1_warp_scan.html b/docs/html/classcub_1_1_warp_scan.html new file mode 100644 index 0000000000..0d2c0b9f55 --- /dev/null +++ b/docs/html/classcub_1_1_warp_scan.html @@ -0,0 +1,1430 @@ + + + + + + + +CUB: cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS > Class Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS > Class Template Reference
+
+
+

Detailed description

+

template<typename T, int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+class cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >

+ +

WarpScan provides variants of parallel prefix scan across a CUDA warp.

+
+warp_scan_logo.png +
+.
+
Overview
Given a list of input elements and a binary reduction operator, a prefix scan produces an output list where each element is computed to be the reduction of the elements occurring earlier in the input list. Prefix sum connotes a prefix scan with the addition operator. The term inclusive indicates that the ith output reduction includes the ith input. The term exclusive indicates the ith input is not computed into the ith output reduction.
+
For convenience, WarpScan exposes a spectrum of entrypoints that differ by:
    +
  • Operator (generic scan vs. prefix sum for numeric types)
  • +
  • Output ordering (inclusive vs. exclusive)
  • +
  • Warp-wide prefix (identity vs. call-back functor)
  • +
  • Output (scanned elements only vs. scanned elements and the total aggregate)
  • +
+
+
Template Parameters
+ + + + +
TThe scan input/output element type
WARPSThe number of "logical" warps performing concurrent warp scans
LOGICAL_WARP_THREADS[optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size associated with the CUDA Compute Capability targeted by the compiler (e.g., 32 warps for SM20).
+
+
+
Usage Considerations
    +
  • Supports non-commutative scan operators
  • +
  • Supports "logical" warps smaller than the physical warp size (e.g., a logical warp of 8 threads)
  • +
  • Warp scans are concurrent if more than one warp is participating
  • +
  • Any warp-wide scalar inputs and outputs (e.g., warp_prefix_op and warp_aggregate) are only considered valid in lane0
  • +
  • After any operation, a subsequent __syncthreads() barrier is required if the supplied WarpScan::SmemStorage is to be reused or repurposed by the threadblock
  • +
+
+
Performance Considerations
    +
  • Uses special instructions when applicable (e.g., warp SHFL)
  • +
  • Uses synchronization-free communication between warp lanes when applicable
  • +
  • Zero bank conflicts for most types.
  • +
  • Computation is slightly more efficient (i.e., having lower instruction overhead) for:
      +
    • Prefix sum variants (vs. generic scan)
    • +
    • Exclusive variants (vs. inclusive)
    • +
    • Basic scan variants that don't require scalar inputs and outputs (e.g., warp_prefix_op and warp_aggregate)
    • +
    • Scan parameterizations where T is a built-in C++ primitive or CUDA vector type (e.g., short, int2, double, float2, etc.)
    • +
    • Scan parameterizations where LOGICAL_WARP_THREADS is a multiple of the architecture's warp size
    • +
    +
  • +
+
+
Algorithm
These parallel prefix scan variants implement a warp-synchronous Kogge-Stone algorithm having O(logn) steps and O(nlogn) work complexity, where n = LOGICAL_WARP_THREADS (which defaults to the warp size associated with the CUDA Compute Capability targeted by the compiler).
+
+
+kogge_stone_scan.png +
+
Data flow within a 16-thread Kogge-Stone scan construction. Junctions represent binary operators.

+
+
Examples
Example 1. Perform a simple exclusive prefix sum for one warp
#include <cub.cuh>
+
+
__global__ void SomeKernel(...)
+
{
+
// A parameterized int-based WarpScan type for use with one warp.
+
typedef cub::WarpScan<int, 1> WarpScan;
+
+
// Opaque shared memory for WarpScan
+
__shared__ typename WarpScan::SmemStorage smem_storage;
+
+
// Perform prefix sum of threadIds in first warp
+
if (threadIdx.x < 32)
+
{
+
int input = threadIdx.x;
+
int output;
+
WarpScan::ExclusiveSum(smem_storage, input, output);
+
+
printf("tid(%d) output(%d)\n\n", threadIdx.x, output);
+
}
+
Printed output:
tid(0) output(0)
+
tid(1) output(0)
+
tid(2) output(1)
+
tid(3) output(3)
+
tid(4) output(6)
+
...
+
tid(31) output(465)
+
+
Example 2. Perform an exclusive prefix sum for one warp seeded with a warp-wide prefix
#include <cub.cuh>
+
+
struct WarpPrefixOp
+
{
+
int warp_prefix;
+
+
__device__ WarpPrefixOp(int warp_prefix) : warp_prefix(warp_prefix) {}
+
+
__device__ int operator(int warp_aggregate)
+
{
+
int old_prefix = warp_prefix;
+
warp_prefix += warp_aggregate;
+
return old_prefix;
+
}
+
}
+
+
__global__ void SomeKernel(...)
+
{
+
// A parameterized int-based WarpScan type for use with one warp.
+
typedef cub::WarpScan<int, 1> WarpScan;
+
+
// Opaque shared memory for WarpScan
+
__shared__ typename WarpScan::SmemStorage smem_storage;
+
+
// Perform prefix sum of 2s, all seeded with a warp prefix value of 10
+
if (threadIdx.x < 32)
+
{
+
int input = 2;
+
int output;
+
int warp_aggregate;
+
WarpPrefixOp warp_prefix_op(10);
+
WarpScan::ExclusiveSum(smem_storage, input, output,
+
warp_aggregate, warp_prefix_op);
+
+
printf("tid(%d) output(%d)\n\n", threadIdx.x, output);
+
if (threadIdx.x == 0)
+
printf("updated aggregate(%d) and warp_prefix(%d)\n",
+
aggregate, warp_prefix_op.warp_prefix);
+
}
+
Printed output:
tid(0) output(10)
+
tid(1) output(12)
+
tid(2) output(14)
+
tid(3) output(16)
+
tid(4) output(18)
+
...
+
tid(31) output(72)
+
+
updated aggregate(74) and warp_prefix(84)
+
+
+ + + + +

+Public Types

+typedef _SmemStorage SmemStorage
 The operations exposed by WarpScan require shared memory of this type. This opaque storage can be allocated directly using the __shared__ keyword. Alternatively, it can be aliased to externally allocated shared memory or union'd with other types to facilitate shared memory reuse.
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Static Public Methods

Inclusive prefix sums
static __device__
+__forceinline__ void 
InclusiveSum (SmemStorage &smem_storage, T input, T &output)
 Computes an inclusive prefix sum in each logical warp. More...
 
static __device__
+__forceinline__ void 
InclusiveSum (SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)
 Computes an inclusive prefix sum in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. More...
 
template<typename WarpPrefixOp >
static __device__
+__forceinline__ void 
InclusiveSum (SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)
 Computes an inclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate. More...
 
Exclusive prefix sums
static __device__
+__forceinline__ void 
ExclusiveSum (SmemStorage &smem_storage, T input, T &output)
 Computes an exclusive prefix sum in each logical warp. More...
 
static __device__
+__forceinline__ void 
ExclusiveSum (SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)
 Computes an exclusive prefix sum in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. More...
 
template<typename WarpPrefixOp >
static __device__
+__forceinline__ void 
ExclusiveSum (SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)
 Computes an exclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate. More...
 
Inclusive prefix scans
template<typename ScanOp >
static __device__
+__forceinline__ void 
InclusiveScan (SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)
 Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. More...
 
template<typename ScanOp >
static __device__
+__forceinline__ void 
InclusiveScan (SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)
 Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. More...
 
template<typename ScanOp , typename WarpPrefixOp >
static __device__
+__forceinline__ void 
InclusiveScan (SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)
 Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. The call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate. More...
 
Exclusive prefix scans
template<typename ScanOp >
static __device__
+__forceinline__ void 
ExclusiveScan (SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op)
 Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. More...
 
template<typename ScanOp >
static __device__
+__forceinline__ void 
ExclusiveScan (SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate)
 Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. More...
 
template<typename ScanOp , typename WarpPrefixOp >
static __device__
+__forceinline__ void 
ExclusiveScan (SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)
 Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. The call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate. More...
 
Exclusive prefix scans (without supplied identity)
template<typename ScanOp >
static __device__
+__forceinline__ void 
ExclusiveScan (SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)
 Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Because no identity value is supplied, the output computed for thread-lane0 is invalid. More...
 
template<typename ScanOp >
static __device__
+__forceinline__ void 
ExclusiveScan (SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)
 Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Because no identity value is supplied, the output computed for thread-lane0 is invalid. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. More...
 
template<typename ScanOp , typename WarpPrefixOp >
static __device__
+__forceinline__ void 
ExclusiveScan (SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)
 Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. The warp_prefix_op value from thread-thread-lane0 is applied to all scan outputs. Also computes the warp-wide warp_aggregate of all inputs for thread-thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate. More...
 
+

Member Function Documentation

+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::InclusiveSum (SmemStoragesmem_storage,
input,
T & output 
)
+
+inlinestatic
+
+ +

Computes an inclusive prefix sum in each logical warp.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::InclusiveSum (SmemStoragesmem_storage,
input,
T & output,
T & warp_aggregate 
)
+
+inlinestatic
+
+ +

Computes an inclusive prefix sum in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename WarpPrefixOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::InclusiveSum (SmemStoragesmem_storage,
input,
T & output,
T & warp_aggregate,
WarpPrefixOp & warp_prefix_op 
)
+
+inlinestatic
+
+ +

Computes an inclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

The warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). The functor's input parameter warp_aggregate is the same value also returned by the scan operation. This functor is expected to return a warp-wide prefix to be applied to all inputs. The functor will be invoked by the entire warp of threads, however the input and output are undefined in threads other than warp-lane0. Can be stateful.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
WarpPrefixOp[inferred] Call-back functor type having member T operator()(T warp_aggregate)
+
+
+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items, exclusive of the warp_prefix_op value
[in,out]warp_prefix_op[warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveSum (SmemStoragesmem_storage,
input,
T & output 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix sum in each logical warp.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveSum (SmemStoragesmem_storage,
input,
T & output,
T & warp_aggregate 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix sum in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename WarpPrefixOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveSum (SmemStoragesmem_storage,
input,
T & output,
T & warp_aggregate,
WarpPrefixOp & warp_prefix_op 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix sum in each logical warp. Instead of using 0 as the warp-wide prefix, the call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

The warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). The functor's input parameter warp_aggregate is the same value also returned by the scan operation. This functor is expected to return a warp-wide prefix to be applied to all inputs. The functor will be invoked by the entire warp of threads, however the input and output are undefined in threads other than warp-lane0. Can be stateful.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
WarpPrefixOp[inferred] Call-back functor type having member T operator()(T warp_aggregate)
+
+
+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the warp_prefix_op value).
[in,out]warp_prefix_op[warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::InclusiveScan (SmemStoragesmem_storage,
input,
T & output,
ScanOp scan_op 
)
+
+inlinestatic
+
+ +

Computes an inclusive prefix sum using the specified binary scan functor in each logical warp.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
+
+
+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::InclusiveScan (SmemStoragesmem_storage,
input,
T & output,
ScanOp scan_op,
T & warp_aggregate 
)
+
+inlinestatic
+
+ +

Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
+
+
+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp , typename WarpPrefixOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::InclusiveScan (SmemStoragesmem_storage,
input,
T & output,
ScanOp scan_op,
T & warp_aggregate,
WarpPrefixOp & warp_prefix_op 
)
+
+inlinestatic
+
+ +

Computes an inclusive prefix sum using the specified binary scan functor in each logical warp. The call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

The warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). The functor's input parameter warp_aggregate is the same value also returned by the scan operation. This functor is expected to return a warp-wide prefix to be applied to all inputs. The functor will be invoked by the entire warp of threads, however the input and output are undefined in threads other than warp-lane0. Can be stateful.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
WarpPrefixOp[inferred] Call-back functor type having member T operator()(T warp_aggregate)
+
+
+
Parameters
+ + + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the warp_prefix_op value).
[in,out]warp_prefix_op[warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveScan (SmemStoragesmem_storage,
input,
T & output,
const T & identity,
ScanOp scan_op 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix scan using the specified binary scan functor in each logical warp.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
+
+
+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]identityIdentity value
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveScan (SmemStoragesmem_storage,
input,
T & output,
const T & identity,
ScanOp scan_op,
T & warp_aggregate 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
+
+
+
Parameters
+ + + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]identityIdentity value
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp , typename WarpPrefixOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveScan (SmemStoragesmem_storage,
input,
T & output,
const T & identity,
ScanOp scan_op,
T & warp_aggregate,
WarpPrefixOp & warp_prefix_op 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. The call-back functor warp_prefix_op is invoked to provide the "seed" value that logically prefixes the warp's scan inputs. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

The warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). The functor's input parameter warp_aggregate is the same value also returned by the scan operation. This functor is expected to return a warp-wide prefix to be applied to all inputs. The functor will be invoked by the entire warp of threads, however the input and output are undefined in threads other than warp-lane0. Can be stateful.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
WarpPrefixOp[inferred] Call-back functor type having member T operator()(T warp_aggregate)
+
+
+
Parameters
+ + + + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]identityIdentity value
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the warp_prefix_op value).
[in,out]warp_prefix_op[warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveScan (SmemStoragesmem_storage,
input,
T & output,
ScanOp scan_op 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Because no identity value is supplied, the output computed for thread-lane0 is invalid.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
+
+
+
Parameters
+ + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveScan (SmemStoragesmem_storage,
input,
T & output,
ScanOp scan_op,
T & warp_aggregate 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. Because no identity value is supplied, the output computed for thread-lane0 is invalid. Also computes the warp-wide warp_aggregate of all inputs for thread-lane0.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
+
+
+
Parameters
+ + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items.
+
+
+ +
+
+ +
+
+
+template<typename T , int WARPS, int LOGICAL_WARP_THREADS = DeviceProps::WARP_THREADS>
+
+template<typename ScanOp , typename WarpPrefixOp >
+ + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
static __device__ __forceinline__ void cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >::ExclusiveScan (SmemStoragesmem_storage,
input,
T & output,
ScanOp scan_op,
T & warp_aggregate,
WarpPrefixOp & warp_prefix_op 
)
+
+inlinestatic
+
+ +

Computes an exclusive prefix scan using the specified binary scan functor in each logical warp. The warp_prefix_op value from thread-thread-lane0 is applied to all scan outputs. Also computes the warp-wide warp_aggregate of all inputs for thread-thread-lane0. The warp_prefix_op is further updated by the value of warp_aggregate.

+

The warp_aggregate is undefined in threads other than thread-lane0.

+

The warp_prefix_op functor must implement a member function T operator()(T warp_aggregate). The functor's input parameter warp_aggregate is the same value also returned by the scan operation. This functor is expected to return a warp-wide prefix to be applied to all inputs. The functor will be invoked by the entire warp of threads, however the input and output are undefined in threads other than warp-lane0. Can be stateful.

+

A subsequent __syncthreads() threadblock barrier should be invoked after calling this method if the supplied smem_storage is to be reused or repurposed by the threadblock.

+
Template Parameters
+ + + +
ScanOp[inferred] Binary scan operator type having member T operator()(const T &a, const T &b)
WarpPrefixOp[inferred] Call-back functor type having member T operator()(T warp_aggregate)
+
+
+
Parameters
+ + + + + + + +
[in]smem_storageShared reference to opaque SmemStorage layout
[in]inputCalling thread's input item.
[out]outputCalling thread's output item. May be aliased with input.
[in]scan_opBinary scan operator having member T operator()(const T &a, const T &b)
[out]warp_aggregate[warp-lane0 only] Warp-wide aggregate reduction of input items (exclusive of the warp_prefix_op value).
[in,out]warp_prefix_op[warp-lane0 only] Call-back functor for specifying a warp-wide prefix to be applied to all inputs.
+
+
+ +
+
+
The documentation for this class was generated from the following file: +
+ + + + + diff --git a/docs/html/classes.html b/docs/html/classes.html new file mode 100644 index 0000000000..4281200122 --- /dev/null +++ b/docs/html/classes.html @@ -0,0 +1,138 @@ + + + + + + + +CUB: Class Index + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + +
+ + + + +
+ +
+ +
+
+
Class Index
+
+
+
A | B | E | I | L | M | N | R | S | T | W
+ + + + + + + + + + + + +
  A  
+
BlockRadixSort (cub)   
  I  
+
  N  
+
  T  
+
BlockReduce (cub)   
ArrayTraits (cub)   BlockScan (cub)   If (cub)   NullType (cub)   Traits (cub)   
  B  
+
BlockStore (cub)   IsVolatile (cub)   NumericTraits (cub)   
  W  
+
  E  
+
  L  
+
  R  
+
BaseTraits (cub)   WarpScan (cub)   
BlockDiscontinuity (cub)   EnableIf (cub)   Log2 (cub)   RemoveQualifiers (cub)   
BlockExchange (cub)   Equality (cub)   
  M  
+
  S  
+
BlockLoad (cub)   Equals (cub)   
Max (cub)   Sum (cub)   
+
A | B | E | I | L | M | N | R | S | T | W
+
+ + + + + diff --git a/docs/html/closed.png b/docs/html/closed.png new file mode 100644 index 0000000000..98cc2c909d Binary files /dev/null and b/docs/html/closed.png differ diff --git a/cub/docs/images/cub_overview.png b/docs/html/cub_overview.png similarity index 100% rename from cub/docs/images/cub_overview.png rename to docs/html/cub_overview.png diff --git a/docs/html/debug_8cuh.html b/docs/html/debug_8cuh.html new file mode 100644 index 0000000000..de86308386 --- /dev/null +++ b/docs/html/debug_8cuh.html @@ -0,0 +1,177 @@ + + + + + + + +CUB: debug.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
debug.cuh File Reference
+
+
+
#include <stdio.h>
+#include "ns_wrapper.cuh"
+#include "device_props.cuh"
+
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+ + + + + +

+Macros

#define CubDebug(f)   cub::Debug(f, __FILE__, __LINE__)
 
#define CubDebugExit(f)   if (cub::Debug(f, __FILE__, __LINE__)) exit(1)
 
+ + + + + + + +

+Functions

__host__ __device__
+__forceinline__ cudaError_t 
cub::Debug (cudaError_t error, const char *message, const char *filename, int line)
 If CUB_STDERR is defined and error is not cudaSuccess, message is printed to stderr along with the supplied source context. More...
 
__host__ __device__
+__forceinline__ cudaError_t 
cub::Debug (cudaError_t error, const char *filename, int line)
 If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr along with the supplied source context. More...
 
+

Detailed Description

+

Debug error display routines

+

Macro Definition Documentation

+ +
+
+ + + + + + + + +
#define CubDebug( f)   cub::Debug(f, __FILE__, __LINE__)
+
+

Debug macro

+ +
+
+ +
+
+ + + + + + + + +
#define CubDebugExit( f)   if (cub::Debug(f, __FILE__, __LINE__)) exit(1)
+
+

Debug macro with exit

+ +
+
+
+ + + + + diff --git a/cub/docs/images/devfun_abstraction.png b/docs/html/devfun_abstraction.png similarity index 100% rename from cub/docs/images/devfun_abstraction.png rename to docs/html/devfun_abstraction.png diff --git a/docs/html/dir_011e1c944d88f71be72e1e24a5fda7cf.html b/docs/html/dir_011e1c944d88f71be72e1e24a5fda7cf.html new file mode 100644 index 0000000000..208be39297 --- /dev/null +++ b/docs/html/dir_011e1c944d88f71be72e1e24a5fda7cf.html @@ -0,0 +1,126 @@ + + + + + + + +CUB: block Directory Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+
+
block Directory Reference
+
+
+ + + + + + + + + + + + + + + + +

+Files

file  block_discontinuity.cuh
 
file  block_exchange.cuh
 
file  block_load.cuh
 
file  block_radix_sort.cuh
 
file  block_reduce.cuh
 
file  block_scan.cuh
 
file  block_store.cuh
 
+
+ + + + + diff --git a/docs/html/dir_bb50a5ef59f19d030d06415663184d05.html b/docs/html/dir_bb50a5ef59f19d030d06415663184d05.html new file mode 100644 index 0000000000..856cb7cfae --- /dev/null +++ b/docs/html/dir_bb50a5ef59f19d030d06415663184d05.html @@ -0,0 +1,116 @@ + + + + + + + +CUB: thread Directory Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+
+
thread Directory Reference
+
+
+ + + + + + +

+Files

file  thread_load.cuh
 
file  thread_store.cuh
 
+
+ + + + + diff --git a/docs/html/dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html b/docs/html/dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html new file mode 100644 index 0000000000..fdafbaa2e6 --- /dev/null +++ b/docs/html/dir_cb3a671affffe7eeb3fdf5ae58e42cc8.html @@ -0,0 +1,114 @@ + + + + + + + +CUB: warp Directory Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+
+
warp Directory Reference
+
+
+ + + + +

+Files

file  warp_scan.cuh
 
+
+ + + + + diff --git a/docs/html/dir_d583f216f1aafe19404e836b0c097ad2.html b/docs/html/dir_d583f216f1aafe19404e836b0c097ad2.html new file mode 100644 index 0000000000..47f004ecdc --- /dev/null +++ b/docs/html/dir_d583f216f1aafe19404e836b0c097ad2.html @@ -0,0 +1,127 @@ + + + + + + + +CUB: cub Directory Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+
+
cub Directory Reference
+
+
+ + + + + + + + +

+Directories

directory  block
 
directory  thread
 
directory  warp
 
+ + + + + + + +

+Files

file  debug.cuh
 
file  operators.cuh
 
file  type_utils.cuh
 
+
+ + + + + diff --git a/cub/docs/images/discont_logo.png b/docs/html/discont_logo.png similarity index 100% rename from cub/docs/images/discont_logo.png rename to docs/html/discont_logo.png diff --git a/cub/docs/images/download-icon.png b/docs/html/download-icon.png similarity index 100% rename from cub/docs/images/download-icon.png rename to docs/html/download-icon.png diff --git a/docs/html/download_cub.html b/docs/html/download_cub.html new file mode 100644 index 0000000000..5fe0be4517 --- /dev/null +++ b/docs/html/download_cub.html @@ -0,0 +1,51 @@ + + + + + + + + + + + + + + + + +
+If your download doesn't start in 3s: +

+ +Download CUB! +
+ + + \ No newline at end of file diff --git a/docs/html/doxygen.css b/docs/html/doxygen.css new file mode 100644 index 0000000000..dabaff2fd8 --- /dev/null +++ b/docs/html/doxygen.css @@ -0,0 +1,1184 @@ +/* The standard CSS for doxygen 1.8.3.1 */ + +body, table, div, p, dl { + font: 400 14px/19px Roboto,sans-serif; +} + +/* @group Heading Levels */ + +h1.groupheader { + font-size: 150%; +} + +.title { + font-size: 150%; + font-weight: bold; + margin: 10px 2px; +} + +h2.groupheader { + border-bottom: 1px solid #879ECB; + color: #354C7B; + font-size: 150%; + font-weight: normal; + margin-top: 1.75em; + padding-top: 8px; + padding-bottom: 4px; + width: 100%; +} + +h3.groupheader { + font-size: 100%; +} + +h1, h2, h3, h4, h5, h6 { + -webkit-transition: text-shadow 0.5s linear; + -moz-transition: text-shadow 0.5s linear; + -ms-transition: text-shadow 0.5s linear; + -o-transition: text-shadow 0.5s linear; + transition: text-shadow 0.5s linear; + margin-right: 15px; +} + +h1.glow, h2.glow, h3.glow, h4.glow, h5.glow, h6.glow { + text-shadow: 0 0 15px cyan; +} + +dt { + font-weight: bold; +} + +div.multicol { + -moz-column-gap: 1em; + -webkit-column-gap: 1em; + -moz-column-count: 3; + -webkit-column-count: 3; +} + +p.startli, p.startdd, p.starttd { + margin-top: 2px; +} + +p.endli { + margin-bottom: 0px; +} + +p.enddd { + margin-bottom: 4px; +} + +p.endtd { + margin-bottom: 2px; +} + +/* @end */ + +caption { + font-weight: bold; +} + +span.legend { + font-size: 70%; + text-align: center; +} + +h3.version { + font-size: 90%; + text-align: center; +} + +div.qindex, div.navtab{ + background-color: #EBEFF6; + border: 1px solid #A3B4D7; + text-align: center; +} + +div.qindex, div.navpath { + width: 100%; + line-height: 140%; +} + +div.navtab { + margin-right: 15px; +} + +/* @group Link Styling */ + +a { + color: #3D578C; + font-weight: normal; + text-decoration: none; +} + +.contents a:visited { + color: #4665A2; +} + +a:hover { + text-decoration: underline; +} + +a.qindex { + font-weight: bold; +} + +a.qindexHL { + font-weight: bold; + background-color: #9CAFD4; + color: #ffffff; + border: 1px double #869DCA; +} + +.contents a.qindexHL:visited { + color: #ffffff; +} + +a.el { + font-weight: bold; +} + +a.elRef { +} + +a.code, a.code:visited { + color: #4665A2; +} + +a.codeRef, a.codeRef:visited { + color: #4665A2; +} + +/* @end */ + +dl.el { + margin-left: -1cm; +} + +pre.fragment { + border: 1px solid #C4CFE5; + background-color: #FBFCFD; + padding: 4px 6px; + margin: 4px 8px 4px 2px; + overflow: auto; + word-wrap: break-word; + font-size: 9pt; + line-height: 125%; + font-family: monospace, fixed; + font-size: 105%; +} + +div.fragment { + padding: 4px; + margin: 4px; + background-color: #FBFCFD; + border: 1px solid #C4CFE5; +} + +div.line { + font-family: monospace, fixed; + font-size: 13px; + min-height: 13px; + line-height: 1.0; + text-wrap: unrestricted; + white-space: -moz-pre-wrap; /* Moz */ + white-space: -pre-wrap; /* Opera 4-6 */ + white-space: -o-pre-wrap; /* Opera 7 */ + white-space: pre-wrap; /* CSS3 */ + word-wrap: break-word; /* IE 5.5+ */ + text-indent: -53px; + padding-left: 53px; + padding-bottom: 0px; + margin: 0px; + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; +} + +div.line.glow { + background-color: cyan; + box-shadow: 0 0 10px cyan; +} + + +span.lineno { + padding-right: 4px; + text-align: right; + border-right: 2px solid #0F0; + background-color: #E8E8E8; + white-space: pre; +} +span.lineno a { + background-color: #D8D8D8; +} + +span.lineno a:hover { + background-color: #C8C8C8; +} + +div.ah { + background-color: black; + font-weight: bold; + color: #ffffff; + margin-bottom: 3px; + margin-top: 3px; + padding: 0.2em; + border: solid thin #333; + border-radius: 0.5em; + -webkit-border-radius: .5em; + -moz-border-radius: .5em; + box-shadow: 2px 2px 3px #999; + -webkit-box-shadow: 2px 2px 3px #999; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; + background-image: -webkit-gradient(linear, left top, left bottom, from(#eee), to(#000),color-stop(0.3, #444)); + background-image: -moz-linear-gradient(center top, #eee 0%, #444 40%, #000); +} + +div.groupHeader { + margin-left: 16px; + margin-top: 12px; + font-weight: bold; +} + +div.groupText { + margin-left: 16px; + font-style: italic; +} + +body { + background-color: white; + color: black; + margin: 0; +} + +div.contents { + margin-top: 10px; + margin-left: 12px; + margin-right: 8px; +} + +td.indexkey { + background-color: #EBEFF6; + font-weight: bold; + border: 1px solid #C4CFE5; + margin: 2px 0px 2px 0; + padding: 2px 10px; + white-space: nowrap; + vertical-align: top; +} + +td.indexvalue { + background-color: #EBEFF6; + border: 1px solid #C4CFE5; + padding: 2px 10px; + margin: 2px 0px; +} + +tr.memlist { + background-color: #EEF1F7; +} + +p.formulaDsp { + text-align: center; +} + +img.formulaDsp { + +} + +img.formulaInl { + vertical-align: middle; +} + +div.center { + text-align: center; + margin-top: 0px; + margin-bottom: 0px; + padding: 0px; +} + +div.center img { + border: 0px; +} + +address.footer { + text-align: right; + padding-right: 12px; +} + +img.footer { + border: 0px; + vertical-align: middle; +} + +/* @group Code Colorization */ + +span.keyword { + color: #008000 +} + +span.keywordtype { + color: #604020 +} + +span.keywordflow { + color: #e08000 +} + +span.comment { + color: #800000 +} + +span.preprocessor { + color: #806020 +} + +span.stringliteral { + color: #002080 +} + +span.charliteral { + color: #008080 +} + +span.vhdldigit { + color: #ff00ff +} + +span.vhdlchar { + color: #000000 +} + +span.vhdlkeyword { + color: #700070 +} + +span.vhdllogic { + color: #ff0000 +} + +blockquote { + background-color: #F7F8FB; + border-left: 2px solid #9CAFD4; + margin: 0 24px 0 4px; + padding: 0 12px 0 16px; +} + +/* @end */ + +/* +.search { + color: #003399; + font-weight: bold; +} + +form.search { + margin-bottom: 0px; + margin-top: 0px; +} + +input.search { + font-size: 75%; + color: #000080; + font-weight: normal; + background-color: #e8eef2; +} +*/ + +td.tiny { + font-size: 75%; +} + +.dirtab { + padding: 4px; + border-collapse: collapse; + border: 1px solid #A3B4D7; +} + +th.dirtab { + background: #EBEFF6; + font-weight: bold; +} + +hr { + height: 0px; + border: none; + border-top: 1px solid #4A6AAA; +} + +hr.footer { + height: 1px; +} + +/* @group Member Descriptions */ + +table.memberdecls { + border-spacing: 0px; + padding: 0px; +} + +.memberdecls td, .fieldtable tr { + -webkit-transition-property: background-color, box-shadow; + -webkit-transition-duration: 0.5s; + -moz-transition-property: background-color, box-shadow; + -moz-transition-duration: 0.5s; + -ms-transition-property: background-color, box-shadow; + -ms-transition-duration: 0.5s; + -o-transition-property: background-color, box-shadow; + -o-transition-duration: 0.5s; + transition-property: background-color, box-shadow; + transition-duration: 0.5s; +} + +.memberdecls td.glow, .fieldtable tr.glow { + background-color: cyan; + box-shadow: 0 0 15px cyan; +} + +.mdescLeft, .mdescRight, +.memItemLeft, .memItemRight, +.memTemplItemLeft, .memTemplItemRight, .memTemplParams { + background-color: #F9FAFC; + border: none; + margin: 4px; + padding: 1px 0 0 8px; +} + +.mdescLeft, .mdescRight { + padding: 0px 8px 4px 8px; + color: #555; +} + +.memSeparator { + border-bottom: 1px solid #DEE4F0; + line-height: 1px; + margin: 0px; + padding: 0px; +} + +.memItemLeft, .memTemplItemLeft { + white-space: nowrap; +} + +.memItemRight { + width: 100%; +} + +.memTemplParams { + color: #4665A2; + white-space: nowrap; + font-size: 80%; +} + +/* @end */ + +/* @group Member Details */ + +/* Styles for detailed member documentation */ + +.memtemplate { + font-size: 80%; + color: #4665A2; + font-weight: normal; + margin-left: 9px; +} + +.memnav { + background-color: #EBEFF6; + border: 1px solid #A3B4D7; + text-align: center; + margin: 2px; + margin-right: 15px; + padding: 2px; +} + +.mempage { + width: 100%; +} + +.memitem { + padding: 0; + margin-bottom: 10px; + margin-right: 5px; + -webkit-transition: box-shadow 0.5s linear; + -moz-transition: box-shadow 0.5s linear; + -ms-transition: box-shadow 0.5s linear; + -o-transition: box-shadow 0.5s linear; + transition: box-shadow 0.5s linear; + display: table !important; + width: 100%; +} + +.memitem.glow { + box-shadow: 0 0 15px cyan; +} + +.memname { + font-weight: bold; + margin-left: 6px; +} + +.memname td { + vertical-align: bottom; +} + +.memproto, dl.reflist dt { + border-top: 1px solid #A8B8D9; + border-left: 1px solid #A8B8D9; + border-right: 1px solid #A8B8D9; + padding: 6px 0px 6px 0px; + color: #253555; + font-weight: bold; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + background-image:url('nav_f.png'); + background-repeat:repeat-x; + background-color: #E2E8F2; + /* opera specific markup */ + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + border-top-right-radius: 4px; + border-top-left-radius: 4px; + /* firefox specific markup */ + -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px; + -moz-border-radius-topright: 4px; + -moz-border-radius-topleft: 4px; + /* webkit specific markup */ + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + -webkit-border-top-right-radius: 4px; + -webkit-border-top-left-radius: 4px; + +} + +.memdoc, dl.reflist dd { + border-bottom: 1px solid #A8B8D9; + border-left: 1px solid #A8B8D9; + border-right: 1px solid #A8B8D9; + padding: 6px 10px 2px 10px; + background-color: #FBFCFD; + border-top-width: 0; + background-image:url('nav_g.png'); + background-repeat:repeat-x; + background-color: #FFFFFF; + /* opera specific markup */ + border-bottom-left-radius: 4px; + border-bottom-right-radius: 4px; + box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); + /* firefox specific markup */ + -moz-border-radius-bottomleft: 4px; + -moz-border-radius-bottomright: 4px; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 5px 5px 5px; + /* webkit specific markup */ + -webkit-border-bottom-left-radius: 4px; + -webkit-border-bottom-right-radius: 4px; + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +dl.reflist dt { + padding: 5px; +} + +dl.reflist dd { + margin: 0px 0px 10px 0px; + padding: 5px; +} + +.paramkey { + text-align: right; +} + +.paramtype { + white-space: nowrap; +} + +.paramname { + color: #602020; + white-space: nowrap; +} +.paramname em { + font-style: normal; +} +.paramname code { + line-height: 14px; +} + +.params, .retval, .exception, .tparams { + margin-left: 0px; + padding-left: 0px; +} + +.params .paramname, .retval .paramname { + font-weight: bold; + vertical-align: top; +} + +.params .paramtype { + font-style: italic; + vertical-align: top; +} + +.params .paramdir { + font-family: "courier new",courier,monospace; + vertical-align: top; +} + +table.mlabels { + border-spacing: 0px; +} + +td.mlabels-left { + width: 100%; + padding: 0px; +} + +td.mlabels-right { + vertical-align: bottom; + padding: 0px; + white-space: nowrap; +} + +span.mlabels { + margin-left: 8px; +} + +span.mlabel { + background-color: #728DC1; + border-top:1px solid #5373B4; + border-left:1px solid #5373B4; + border-right:1px solid #C4CFE5; + border-bottom:1px solid #C4CFE5; + text-shadow: none; + color: white; + margin-right: 4px; + padding: 2px 3px; + border-radius: 3px; + font-size: 7pt; + white-space: nowrap; + vertical-align: middle; +} + + + +/* @end */ + +/* these are for tree view when not used as main index */ + +div.directory { + margin: 10px 0px; + border-top: 1px solid #A8B8D9; + border-bottom: 1px solid #A8B8D9; + width: 100%; +} + +.directory table { + border-collapse:collapse; +} + +.directory td { + margin: 0px; + padding: 0px; + vertical-align: top; +} + +.directory td.entry { + white-space: nowrap; + padding-right: 6px; +} + +.directory td.entry a { + outline:none; +} + +.directory td.entry a img { + border: none; +} + +.directory td.desc { + width: 100%; + padding-left: 6px; + padding-right: 6px; + padding-top: 3px; + border-left: 1px solid rgba(0,0,0,0.05); +} + +.directory tr.even { + padding-left: 6px; + background-color: #F7F8FB; +} + +.directory img { + vertical-align: -30%; +} + +.directory .levels { + white-space: nowrap; + width: 100%; + text-align: right; + font-size: 9pt; +} + +.directory .levels span { + cursor: pointer; + padding-left: 2px; + padding-right: 2px; + color: #3D578C; +} + +div.dynheader { + margin-top: 8px; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +address { + font-style: normal; + color: #2A3D61; +} + +table.doxtable { + border-collapse:collapse; + margin-top: 4px; + margin-bottom: 4px; +} + +table.doxtable td, table.doxtable th { + border: 1px solid #2D4068; + padding: 3px 7px 2px; +} + +table.doxtable th { + background-color: #374F7F; + color: #FFFFFF; + font-size: 110%; + padding-bottom: 4px; + padding-top: 5px; +} + +table.fieldtable { + /*width: 100%;*/ + margin-bottom: 10px; + border: 1px solid #A8B8D9; + border-spacing: 0px; + -moz-border-radius: 4px; + -webkit-border-radius: 4px; + border-radius: 4px; + -moz-box-shadow: rgba(0, 0, 0, 0.15) 2px 2px 2px; + -webkit-box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15); + box-shadow: 2px 2px 2px rgba(0, 0, 0, 0.15); +} + +.fieldtable td, .fieldtable th { + padding: 3px 7px 2px; +} + +.fieldtable td.fieldtype, .fieldtable td.fieldname { + white-space: nowrap; + border-right: 1px solid #A8B8D9; + border-bottom: 1px solid #A8B8D9; + vertical-align: top; +} + +.fieldtable td.fieldname { + padding-top: 5px; +} + +.fieldtable td.fielddoc { + border-bottom: 1px solid #A8B8D9; + /*width: 100%;*/ +} + +.fieldtable td.fielddoc p:first-child { + margin-top: 2px; +} + +.fieldtable td.fielddoc p:last-child { + margin-bottom: 2px; +} + +.fieldtable tr:last-child td { + border-bottom: none; +} + +.fieldtable th { + background-image:url('nav_f.png'); + background-repeat:repeat-x; + background-color: #E2E8F2; + font-size: 90%; + color: #253555; + padding-bottom: 4px; + padding-top: 5px; + text-align:left; + -moz-border-radius-topleft: 4px; + -moz-border-radius-topright: 4px; + -webkit-border-top-left-radius: 4px; + -webkit-border-top-right-radius: 4px; + border-top-left-radius: 4px; + border-top-right-radius: 4px; + border-bottom: 1px solid #A8B8D9; +} + + +.tabsearch { + top: 0px; + left: 10px; + height: 36px; + background-image: url('tab_b.png'); + z-index: 101; + overflow: hidden; + font-size: 13px; +} + +.navpath ul +{ + font-size: 11px; + background-image:url('tab_b.png'); + background-repeat:repeat-x; + background-position: 0 -5px; + height:30px; + line-height:30px; + color:#8AA0CC; + border:solid 1px #C2CDE4; + overflow:hidden; + margin:0px; + padding:0px; +} + +.navpath li +{ + list-style-type:none; + float:left; + padding-left:10px; + padding-right:15px; + background-image:url('bc_s.png'); + background-repeat:no-repeat; + background-position:right; + color:#364D7C; +} + +.navpath li.navelem a +{ + height:32px; + display:block; + text-decoration: none; + outline: none; + color: #283A5D; + font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + text-decoration: none; +} + +.navpath li.navelem a:hover +{ + color:#6884BD; +} + +.navpath li.footer +{ + list-style-type:none; + float:right; + padding-left:10px; + padding-right:15px; + background-image:none; + background-repeat:no-repeat; + background-position:right; + color:#364D7C; + font-size: 8pt; +} + + +div.summary +{ + float: right; + font-size: 8pt; + padding-right: 5px; + width: 50%; + text-align: right; +} + +div.summary a +{ + white-space: nowrap; +} + +div.ingroups +{ + font-size: 8pt; + width: 50%; + text-align: left; +} + +div.ingroups a +{ + white-space: nowrap; +} + +div.header +{ + background-image:url('nav_h.png'); + background-repeat:repeat-x; + background-color: #F9FAFC; + margin: 0px; + border-bottom: 1px solid #C4CFE5; +} + +div.headertitle +{ + padding: 5px 5px 5px 10px; +} + +dl +{ + padding: 0 0 0 10px; +} + +/* dl.note, dl.warning, dl.attention, dl.pre, dl.post, dl.invariant, dl.deprecated, dl.todo, dl.test, dl.bug */ +dl.section +{ + margin-left: 0px; + padding-left: 0px; +} + +dl.note +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #D0C000; +} + +dl.warning, dl.attention +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #FF0000; +} + +dl.pre, dl.post, dl.invariant +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #00D000; +} + +dl.deprecated +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #505050; +} + +dl.todo +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #00C0E0; +} + +dl.test +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #3030E0; +} + +dl.bug +{ + margin-left:-7px; + padding-left: 3px; + border-left:4px solid; + border-color: #C08050; +} + +dl.section dd { + margin-bottom: 6px; +} + + +#projectlogo +{ + text-align: center; + vertical-align: bottom; + border-collapse: separate; +} + +#projectlogo img +{ + border: 0px none; +} + +#projectname +{ + font: 300% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 2px 0px; +} + +#projectbrief +{ + font: 120% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; +} + +#projectnumber +{ + font: 50% Tahoma, Arial,sans-serif; + margin: 0px; + padding: 0px; +} + +#titlearea +{ + padding: 0px; + margin: 0px; + width: 100%; + border-bottom: 1px solid #5373B4; +} + +.image +{ + text-align: center; +} + +.dotgraph +{ + text-align: center; +} + +.mscgraph +{ + text-align: center; +} + +.caption +{ + font-weight: bold; +} + +div.zoom +{ + border: 1px solid #90A5CE; +} + +dl.citelist { + margin-bottom:50px; +} + +dl.citelist dt { + color:#334975; + float:left; + font-weight:bold; + margin-right:10px; + padding:5px; +} + +dl.citelist dd { + margin:2px 0; + padding:5px 0; +} + +div.toc { + padding: 14px 25px; + background-color: #F4F6FA; + border: 1px solid #D8DFEE; + border-radius: 7px 7px 7px 7px; + float: right; + height: auto; + margin: 0 20px 10px 10px; + width: 200px; +} + +div.toc li { + background: url("bdwn.png") no-repeat scroll 0 5px transparent; + font: 10px/1.2 Verdana,DejaVu Sans,Geneva,sans-serif; + margin-top: 5px; + padding-left: 10px; + padding-top: 2px; +} + +div.toc h3 { + font: bold 12px/1.2 Arial,FreeSans,sans-serif; + color: #4665A2; + border-bottom: 0 none; + margin: 0; +} + +div.toc ul { + list-style: none outside none; + border: medium none; + padding: 0px; +} + +div.toc li.level1 { + margin-left: 0px; +} + +div.toc li.level2 { + margin-left: 15px; +} + +div.toc li.level3 { + margin-left: 30px; +} + +div.toc li.level4 { + margin-left: 45px; +} + +.inherit_header { + font-weight: bold; + color: gray; + cursor: pointer; + -webkit-touch-callout: none; + -webkit-user-select: none; + -khtml-user-select: none; + -moz-user-select: none; + -ms-user-select: none; + user-select: none; +} + +.inherit_header td { + padding: 6px 0px 2px 5px; +} + +.inherit { + display: none; +} + +tr.heading h2 { + margin-top: 12px; + margin-bottom: 4px; +} + +@media print +{ + #top { display: none; } + #side-nav { display: none; } + #nav-path { display: none; } + body { overflow:visible; } + h1, h2, h3, h4, h5, h6 { page-break-after: avoid; } + .summary { display: none; } + .memitem { page-break-inside: avoid; } + #doc-content + { + margin-left:0 !important; + height:auto !important; + width:auto !important; + overflow:inherit; + display:inline; + } +} + diff --git a/docs/html/doxygen.png b/docs/html/doxygen.png new file mode 100644 index 0000000000..3ff17d807f Binary files /dev/null and b/docs/html/doxygen.png differ diff --git a/docs/html/dynsections.js b/docs/html/dynsections.js new file mode 100644 index 0000000000..ed092c7f63 --- /dev/null +++ b/docs/html/dynsections.js @@ -0,0 +1,97 @@ +function toggleVisibility(linkObj) +{ + var base = $(linkObj).attr('id'); + var summary = $('#'+base+'-summary'); + var content = $('#'+base+'-content'); + var trigger = $('#'+base+'-trigger'); + var src=$(trigger).attr('src'); + if (content.is(':visible')===true) { + content.hide(); + summary.show(); + $(linkObj).addClass('closed').removeClass('opened'); + $(trigger).attr('src',src.substring(0,src.length-8)+'closed.png'); + } else { + content.show(); + summary.hide(); + $(linkObj).removeClass('closed').addClass('opened'); + $(trigger).attr('src',src.substring(0,src.length-10)+'open.png'); + } + return false; +} + +function updateStripes() +{ + $('table.directory tr'). + removeClass('even').filter(':visible:even').addClass('even'); +} +function toggleLevel(level) +{ + $('table.directory tr').each(function(){ + var l = this.id.split('_').length-1; + var i = $('#img'+this.id.substring(3)); + var a = $('#arr'+this.id.substring(3)); + if (l + + + + + + +CUB: Class Members + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + +
+ + + + +
+ +
+ +
+
Here is a list of all documented class members with links to the class documentation for each member:
+ +

- b -

+ + +

- c -

+ + +

- e -

+ + +

- f -

+ + +

- i -

+ + +

- l -

+ + +

- o -

+ + +

- r -

+ + +

- s -

+ + +

- t -

+ + +

- v -

+
+ + + + + diff --git a/docs/html/functions_func.html b/docs/html/functions_func.html new file mode 100644 index 0000000000..c13f0d5f36 --- /dev/null +++ b/docs/html/functions_func.html @@ -0,0 +1,231 @@ + + + + + + + +CUB: Class Members - Functions + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + +
+ + + + +
+ +
+ +
+  + +

- b -

+ + +

- e -

+ + +

- f -

+ + +

- i -

+ + +

- l -

+ + +

- o -

+ + +

- r -

+ + +

- s -

+
+ + + + + diff --git a/docs/html/functions_type.html b/docs/html/functions_type.html new file mode 100644 index 0000000000..3b7a665c1d --- /dev/null +++ b/docs/html/functions_type.html @@ -0,0 +1,133 @@ + + + + + + + +CUB: Class Members - Typedefs + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + +
+ + + + +
+ +
+ + + + + + + diff --git a/docs/html/functions_vars.html b/docs/html/functions_vars.html new file mode 100644 index 0000000000..22fc0f82cb --- /dev/null +++ b/docs/html/functions_vars.html @@ -0,0 +1,124 @@ + + + + + + + +CUB: Class Members - Variables + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + +
+ + + + +
+ +
+ + + + + + + diff --git a/cub/docs/images/github-icon-747d8b799a48162434b2c0595ba1317e.png b/docs/html/github-icon-747d8b799a48162434b2c0595ba1317e.png similarity index 100% rename from cub/docs/images/github-icon-747d8b799a48162434b2c0595ba1317e.png rename to docs/html/github-icon-747d8b799a48162434b2c0595ba1317e.png diff --git a/docs/html/group___host_util.html b/docs/html/group___host_util.html new file mode 100644 index 0000000000..94588d9949 --- /dev/null +++ b/docs/html/group___host_util.html @@ -0,0 +1,195 @@ + + + + + + + +CUB: Host Utilities + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + +
+ + + + +
+ +
+ +
+ +
+
Host Utilities
+
+
+ + + + + + + + +

+Functions

__host__ __device__
+__forceinline__ cudaError_t 
cub::Debug (cudaError_t error, const char *message, const char *filename, int line)
 If CUB_STDERR is defined and error is not cudaSuccess, message is printed to stderr along with the supplied source context. More...
 
__host__ __device__
+__forceinline__ cudaError_t 
cub::Debug (cudaError_t error, const char *filename, int line)
 If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr along with the supplied source context. More...
 
+

Function Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
__host__ __device__ __forceinline__ cudaError_t cub::Debug (cudaError_t error,
const char * message,
const char * filename,
int line 
)
+
+ +

If CUB_STDERR is defined and error is not cudaSuccess, message is printed to stderr along with the supplied source context.

+
Returns
The CUDA error.
+ +
+
+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
__host__ __device__ __forceinline__ cudaError_t cub::Debug (cudaError_t error,
const char * filename,
int line 
)
+
+ +

If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr along with the supplied source context.

+
Returns
The CUDA error.
+ +
+
+
+ + + + + diff --git a/docs/html/group___simt.html b/docs/html/group___simt.html new file mode 100644 index 0000000000..bff7bac5d7 --- /dev/null +++ b/docs/html/group___simt.html @@ -0,0 +1,114 @@ + + + + + + + +CUB: SIMT Primitives + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + +
+ + + + +
+ +
+ +
+ +
+
SIMT Primitives
+
+
+ + + + + + +

+Modules

 Cooperative SIMT Operations
 
 SIMT Utilities
 
+
+ + + + + diff --git a/docs/html/group___simt_coop.html b/docs/html/group___simt_coop.html new file mode 100644 index 0000000000..c26e678565 --- /dev/null +++ b/docs/html/group___simt_coop.html @@ -0,0 +1,174 @@ + + + + + + + +CUB: Cooperative SIMT Operations + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + +
+ + + + +
+ +
+ +
+ +
+
Cooperative SIMT Operations
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

class  cub::BlockDiscontinuity< T, BLOCK_THREADS >
 BlockDiscontinuity provides operations for flagging discontinuities within a list of data items partitioned across a CUDA threadblock.

+
+discont_logo.png +
+.
+ More...
 
class  cub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >
 BlockExchange provides operations for reorganizing the partitioning of ordered data across a CUDA threadblock.

+
+transpose_logo.png +
+.
+ More...
 
class  cub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >
 BlockLoad provides data movement operations for reading block-arranged data from global memory.

+
+block_load_logo.png +
+.
+ More...
 
class  cub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >
 BlockRadixSort provides variants of parallel radix sorting across a CUDA threadblock.

+
+sorting_logo.png +
+.
+ More...
 
class  cub::BlockReduce< T, BLOCK_THREADS >
 BlockReduce provides variants of parallel reduction across a CUDA threadblock.

+
+reduce_logo.png +
+.
+ More...
 
class  cub::BlockScan< T, BLOCK_THREADS, POLICY >
 BlockScan provides variants of parallel prefix scan (and prefix sum) across a CUDA threadblock.

+
+scan_logo.png +
+.
+ More...
 
class  cub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >
 BlockStore provides data movement operations for writing blocked-arranged data to global memory.

+
+block_store_logo.png +
+.
+ More...
 
class  cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >
 WarpScan provides variants of parallel prefix scan across a CUDA warp.

+
+warp_scan_logo.png +
+.
+ More...
 
+
+ + + + + diff --git a/docs/html/group___simt_utils.html.REMOVED.git-id b/docs/html/group___simt_utils.html.REMOVED.git-id new file mode 100644 index 0000000000..a0d067bf82 --- /dev/null +++ b/docs/html/group___simt_utils.html.REMOVED.git-id @@ -0,0 +1 @@ +4f12ad3881281ec2783eca35eaee8cc6a3c7413e \ No newline at end of file diff --git a/cub/docs/images/groups-icon.png b/docs/html/groups-icon.png similarity index 100% rename from cub/docs/images/groups-icon.png rename to docs/html/groups-icon.png diff --git a/docs/html/hierarchy.html b/docs/html/hierarchy.html new file mode 100644 index 0000000000..c4f3a5d277 --- /dev/null +++ b/docs/html/hierarchy.html @@ -0,0 +1,173 @@ + + + + + + + +CUB: Class Hierarchy + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + +
+ + + + +
+ +
+ +
+
+
Class Hierarchy
+
+
+
This inheritance list is sorted roughly, but not completely, alphabetically:
+
[detail level 123]
+ + + + + + + + + + + + + + + + + + + + + + + + + +
oCcub::ArrayTraits< ArrayType, LENGTH >Array traits
oCcub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits >Basic type traits
oCcub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type >
|\Ccub::NumericTraits< RemoveQualifiers< T >::Type >
| \Ccub::Traits< T >Type traits
oCcub::BaseTraits< NOT_A_NUMBER, false, false, T >
|\Ccub::NumericTraits< T >Numeric type traits
oCcub::BlockDiscontinuity< T, BLOCK_THREADS >BlockDiscontinuity provides operations for flagging discontinuities within a list of data items partitioned across a CUDA threadblock.

+
+discont_logo.png +
+
oCcub::BlockExchange< T, BLOCK_THREADS, ITEMS_PER_THREAD >BlockExchange provides operations for reorganizing the partitioning of ordered data across a CUDA threadblock.

+
+transpose_logo.png +
+
oCcub::BlockLoad< InputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >BlockLoad provides data movement operations for reading block-arranged data from global memory.

+
+block_load_logo.png +
+
oCcub::BlockRadixSort< KeyType, BLOCK_THREADS, ITEMS_PER_THREAD, ValueType, RADIX_BITS, SMEM_CONFIG >BlockRadixSort provides variants of parallel radix sorting across a CUDA threadblock.

+
+sorting_logo.png +
+
oCcub::BlockReduce< T, BLOCK_THREADS >BlockReduce provides variants of parallel reduction across a CUDA threadblock.

+
+reduce_logo.png +
+
oCcub::BlockScan< T, BLOCK_THREADS, POLICY >BlockScan provides variants of parallel prefix scan (and prefix sum) across a CUDA threadblock.

+
+scan_logo.png +
+
oCcub::BlockStore< OutputIterator, BLOCK_THREADS, ITEMS_PER_THREAD, POLICY, MODIFIER >BlockStore provides data movement operations for writing blocked-arranged data to global memory.

+
+block_store_logo.png +
+
oCcub::EnableIf< Condition, T >Simple enable-if (similar to Boost)
oCcub::Equality< T >Default equality functor
oCcub::Equals< A, B >Type equality test
oCcub::If< IF, ThenType, ElseType >Type selection (IF ? ThenType : ElseType)
oCcub::IsVolatile< Tp >Volatile modifier test
oCcub::Log2< N, CURRENT_VAL, COUNT >Statically determine log2(N), rounded up
oCcub::Max< T >Default max functor
oCcub::NullTypeA simple "NULL" marker type
oCcub::RemoveQualifiers< Tp, Up >Removes const and volatile qualifiers from type Tp
oCcub::Sum< T >Default sum functor
\Ccub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >WarpScan provides variants of parallel prefix scan across a CUDA warp.

+
+warp_scan_logo.png +
+
+
+
+ + + + + diff --git a/docs/html/index.html b/docs/html/index.html new file mode 100644 index 0000000000..67e3ff65b9 --- /dev/null +++ b/docs/html/index.html @@ -0,0 +1,358 @@ + + + + + + + +CUB: Main Page + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + +
+ + + + +
+ +
+ +
+
+
CUB Documentation
+
+
+ +
+ +   +Download CUB! +
+ +   +Browse or fork CUB at GitHub! +
+ +   +Join the cub-users discussion forum! +

+(1) What is CUB?

+
CUB is a library of high-performance parallel primitives and other utilities for constructing CUDA kernel software. CUB enhances productivity, performance, and portability by providing an abstraction layer over complex block-level, warp-level, and thread-level operations.
+
CUB's primitives are not bound to any particular width of parallelism or to any particular data type. This allows them to be flexible and tunable to fit your kernels' needs. Thus CUB is CUDA Unbound.
+
+cub_overview.png +
+
Browse our collections of:
    +
  • Cooperative primitives, including:
      +
    • Thread block operations (e.g., radix sort, prefix scan, reduction, etc.)
    • +
    • Warp operations (e.g., prefix scan)
    • +
    +
  • +
  • SIMT utilities, including:
      +
    • Tile-based I/O utilities (e.g., for performing {vectorized, coalesced} data movement of {blocked, striped} data tiles)
    • +
    • Low-level thread I/O using cache-modifiers
    • +
    • Abstractions for thread block work distribution (e.g., work-stealing, even-share, etc.)
    • +
    +
  • +
  • Host utilities, including:
      +
    • Caching allocator for quick management of device temporaries
    • +
    • Device reflection
    • +
    +
  • +
+
+

+(2) Recent news

+
    +
  • CUB v0.9.1 (03/09/2013). Intial "preview" release. CUB is the first durable, high-performance library of cooperative block-level, warp-level, and thread-level primitives for CUDA kernel programming. More primitives and examples coming soon!
  • +
+
+

+(3) A simple example

+
The following code snippet illustrates a simple CUDA kernel for sorting a thread block's data:
+
#include <cub.cuh>
+
+
// An tile-sorting CUDA kernel
+
template <
+
int BLOCK_THREADS, // Threads per block
+
int ITEMS_PER_THREAD, // Items per thread
+
typename T> // Numeric data type
+
__global__ void TileSortKernel(T *d_in, T *d_out)
+
{
+
using namespace cub;
+
const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD;
+
+
// Parameterize cub::BlockRadixSort for the parallel execution context
+
typedef BlockRadixSort<T, BLOCK_THREADS> BlockRadixSort;
+
+
// Declare the shared memory needed by BlockRadixSort
+
__shared__ typename BlockRadixSort::SmemStorage smem_storage;
+
+
// A segment of data items per thread
+
T data[ITEMS_PER_THREAD];
+
+
// Load a tile of data using vector-load instructions
+
BlockLoadVectorized(data, d_in + (blockIdx.x * TILE_SIZE));
+
+
// Sort data in ascending order
+
BlockRadixSort::SortBlocked(smem_storage, data);
+
+
// Store the sorted tile using vector-store instructions
+
BlockStoreVectorized(data, d_out + (blockIdx.x * TILE_SIZE));
+
}
+
+
The cub::BlockRadixSort type performs a cooperative radix sort across the thread block's data items. Its implementation is parameterized by the number of threads per block and the aggregate data type T and is specialized for the underlying architecture.
+
Once instantiated, the cub::BlockRadixSort type exposes an opaque cub::BlockRadixSort::SmemStorage member type. The thread block uses this storage type to allocate the shared memory needed by the primitive. This storage type can be aliased or union'd with other types so that the shared memory can be reused for other purposes.
+
Furthermore, the kernel uses CUB's primitives for vectorizing global loads and stores. For example, lower-level ld.global.v4.s32 PTX instructions will be generated when T = int and ITEMS_PER_THREAD is a multiple of 4.
+

+(4) Why do you need CUB?

+
CUDA kernel software is where the complexity of parallelism is expressed. Programmers must reason about deadlock, livelock, synchronization, race conditions, shared memory layout, plurality of state, granularity, throughput, latency, memory bottlenecks, etc. Constructing and fine-tuning kernel code is perhaps the most challenging, time-consuming aspect of CUDA programming.
+
However, with the exception of CUB, there are few (if any) software libraries of reusable kernel primitives. In the CUDA ecosystem, CUB is unique in this regard. As a SIMT library and software abstraction layer, CUB provides:
    +
  1. Simplicity of composition. Parallel CUB primitives can be simply sequenced together in kernel code. (This convenience is analogous to programming with Thrust primitives in the host program.)
  2. +
  3. High performance. CUB simplifies high performance kernel development by taking care to implement and make available the fastest available algorithms, strategies, and techniques.
  4. +
  5. Performance portability. CUB primitives are specialized to match the target hardware. Furthermore, the CUB library continually evolves to accommodate new algorithmic developments, hardware instructions, etc.
  6. +
  7. Simplicity of performance tuning. CUB primitives provide parallel abstractions whose performance behavior can be statically tuned. For example, most CUB primitives support alternative algorithmic strategies and variable grain sizes (threads per block, items per thread, etc.).
  8. +
  9. Robustness and durability. CUB primitives are designed to function properly for arbitrary data types and widths of parallelism (not just for the built-in C++ types or for powers-of-two threads per block).
  10. +
+
+

+(5) Where is CUB positioned in the CUDA ecosystem?

+
CUDA's programming model embodies three different levels of program execution, each engendering its own abstraction layer in the CUDA software stack (i.e., the "black boxes" below):
+ + + + + + + +
CUDA kernel. A single CPU thread invokes a CUDA kernel to perform some data-parallel function. The incorporation of entire kernels (and their corresponding invocation stubs) into libraries is the most common form of code reuse for CUDA. Libraries of CUDA kernels include the following: +
+
+ +
Thread blocks (SIMT). Each kernel invocation comprises some number of parallel threads. Threads are grouped into blocks, and the entire block of threads invokes some cooperative function in which they communicate and synchronize with each other. There has historically been very little reuse of cooperative SIMT software within CUDA kernel. Libraries of thread-block primitives include the following: +
+
+ +
CUDA thread. A single CUDA thread invokes some sequential function. This is the finest-grained level of CUDA software abstraction and requires no consideration for the scheduling or synchronization of parallel threads. CUDA libraries of purely data-parallel functions include the following: +
+
+ +
+

+(6) How does CUB work?

+
CUB leverages the following programming idioms:
    +
  1. C++ templates
  2. +
  3. Reflective type structure
  4. +
  5. Flexible data mapping
  6. +
+
+

+6.1    C++ templates

+
As a SIMT library, CUB must be flexible enough to accommodate a wide spectrum of parallel execution contexts, i.e., specific:
    +
  • Data types
  • +
  • Widths of parallelism (threads per block)
  • +
  • Grain sizes (data items per thread)
  • +
  • Underlying architectures (special instructions, warp size, rules for bank conflicts, etc.)
  • +
  • Tuning requirements (e.g., latency vs. throughput)
  • +
+
+
To provide this flexibility, CUB is implemented as a C++ template library. C++ templates are a way to write generic algorithms and data structures. There is no need to build CUB separately. You simply #include the cub.cuh header file into your .cu CUDA C++ sources and compile with NVIDIA's nvcc compiler.
+

+6.2    Reflective type structure

+
Cooperation within a thread block requires shared memory for communicating between threads. However, the specific size and layout of the memory needed by a given primitive will be specific to the details of its parallel execution context (e.g., how many threads are calling into it, how many items are processed per thread, etc.). Furthermore, this shared memory must be allocated outside of the component itself if it is to be reused elsewhere by the thread block.
+
// Parameterize a BlockScan type for use with 128 threads
+
// and 4 items per thread
+ +
+
// Declare shared memory for BlockScan
+
__shared__ typename BlockScan::SmemStorage smem_storage;
+
+
// A segment of consecutive input items per thread
+
int data[4];
+
+
// Obtain data in blocked order
+
...
+
+
// Perform an exclusive prefix sum across the tile of data
+
BlockScan::ExclusiveSum(smem_storage, data, data);
+
+
To address this issue, we encapsulate cooperative procedures within reflective type structure (C++ classes). As illustrated in the cub::BlockScan example above, these primitives are C++ classes with interfaces that expose both:
    +
  • Procedural entrypoints for a block of threads to invoke
  • +
  • An opaque shared memory type needed for the operation of those methods
  • +
+
+

+6.3    Flexible data mapping

+
We often design kernels such that each thread block is assigned a "tile" of data items for processing.
+
+tile.png +
+
Tile of eight ordered data items
+
When the tile size equals the thread block size, the mapping of data onto threads is straightforward (one datum per thread). However, there are often performance advantages for processing more than one datum per thread. For these scenarios, CUB primitives support the following alternatives for partitioning data items across the block of threads:
+ + + + + +
    +
  • Blocked arrangement. The aggregate tile of items is partitioned evenly across threads in "blocked" fashion with threadi owning the ith segment of consecutive elements. Blocked arrangements are often desirable for algorithmic benefits (where long sequences of items can be processed sequentially within each thread).
  • +
+
+
+blocked.png +
+
Blocked arrangement across four threads
+(emphasis on items owned by thread0)
+
    +
  • Striped arrangement. The aggregate tile of items is partitioned across threads in "striped" fashion, i.e., the ITEMS_PER_THREAD items owned by each thread have logical stride BLOCK_THREADS between them. Striped arrangements are often desirable for data movement through global memory (where read/write coalescing is an important performance consideration).
  • +
+
+
+striped.png +
+
Striped arrangement across four threads
+(emphasis on items owned by thread0)
+
+
The benefits of processing multiple items per thread (a.k.a., register blocking, granularity coarsening, etc.) include:
    +
  • Algorithmic efficiency. Sequential work over multiple items in thread-private registers is cheaper than synchronized, cooperative work through shared memory spaces.
  • +
  • Data occupancy. The number of items that can be resident on-chip in thread-private register storage is often greater than the number of schedulable threads.
  • +
  • Instruction-level parallelism. Multiple items per thread also facilitates greater ILP for improved throughput and utilization.
  • +
+
+
Finally, cub::BlockExchange provides operations for converting between blocked and striped arrangements.
+

+(7) Contributors

+
CUB is developed as an open-source project by NVIDIA Research. The primary contributor is Duane Merrill.
+

+(8) Open Source License

+
CUB is available under the "New BSD" open-source license:
+
Copyright (c) 2011, Duane Merrill. All rights reserved.
+
Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved.
+
+
Redistribution and use in source and binary forms, with or without
+
modification, are permitted provided that the following conditions are met:
+
* Redistributions of source code must retain the above copyright
+
notice, this list of conditions and the following disclaimer.
+
* Redistributions in binary form must reproduce the above copyright
+
notice, this list of conditions and the following disclaimer in the
+
documentation and/or other materials provided with the distribution.
+
* Neither the name of the NVIDIA CORPORATION nor the
+
names of its contributors may be used to endorse or promote products
+
derived from this software without specific prior written permission.
+
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+
DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+ + + + + diff --git a/docs/html/jquery.js b/docs/html/jquery.js new file mode 100644 index 0000000000..63939e76dd --- /dev/null +++ b/docs/html/jquery.js @@ -0,0 +1,8 @@ +/*! jQuery v1.7.1 jquery.com | jquery.org/license */ +(function(a,b){function cy(a){return f.isWindow(a)?a:a.nodeType===9?a.defaultView||a.parentWindow:!1}function cv(a){if(!ck[a]){var b=c.body,d=f("<"+a+">").appendTo(b),e=d.css("display");d.remove();if(e==="none"||e===""){cl||(cl=c.createElement("iframe"),cl.frameBorder=cl.width=cl.height=0),b.appendChild(cl);if(!cm||!cl.createElement)cm=(cl.contentWindow||cl.contentDocument).document,cm.write((c.compatMode==="CSS1Compat"?"":"")+""),cm.close();d=cm.createElement(a),cm.body.appendChild(d),e=f.css(d,"display"),b.removeChild(cl)}ck[a]=e}return ck[a]}function cu(a,b){var c={};f.each(cq.concat.apply([],cq.slice(0,b)),function(){c[this]=a});return c}function ct(){cr=b}function cs(){setTimeout(ct,0);return cr=f.now()}function cj(){try{return new a.ActiveXObject("Microsoft.XMLHTTP")}catch(b){}}function ci(){try{return new a.XMLHttpRequest}catch(b){}}function cc(a,c){a.dataFilter&&(c=a.dataFilter(c,a.dataType));var d=a.dataTypes,e={},g,h,i=d.length,j,k=d[0],l,m,n,o,p;for(g=1;g0){if(c!=="border")for(;g=0===c})}function S(a){return!a||!a.parentNode||a.parentNode.nodeType===11}function K(){return!0}function J(){return!1}function n(a,b,c){var d=b+"defer",e=b+"queue",g=b+"mark",h=f._data(a,d);h&&(c==="queue"||!f._data(a,e))&&(c==="mark"||!f._data(a,g))&&setTimeout(function(){!f._data(a,e)&&!f._data(a,g)&&(f.removeData(a,d,!0),h.fire())},0)}function m(a){for(var b in a){if(b==="data"&&f.isEmptyObject(a[b]))continue;if(b!=="toJSON")return!1}return!0}function l(a,c,d){if(d===b&&a.nodeType===1){var e="data-"+c.replace(k,"-$1").toLowerCase();d=a.getAttribute(e);if(typeof d=="string"){try{d=d==="true"?!0:d==="false"?!1:d==="null"?null:f.isNumeric(d)?parseFloat(d):j.test(d)?f.parseJSON(d):d}catch(g){}f.data(a,c,d)}else d=b}return d}function h(a){var b=g[a]={},c,d;a=a.split(/\s+/);for(c=0,d=a.length;c)[^>]*$|#([\w\-]*)$)/,j=/\S/,k=/^\s+/,l=/\s+$/,m=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,n=/^[\],:{}\s]*$/,o=/\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,p=/"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,q=/(?:^|:|,)(?:\s*\[)+/g,r=/(webkit)[ \/]([\w.]+)/,s=/(opera)(?:.*version)?[ \/]([\w.]+)/,t=/(msie) ([\w.]+)/,u=/(mozilla)(?:.*? rv:([\w.]+))?/,v=/-([a-z]|[0-9])/ig,w=/^-ms-/,x=function(a,b){return(b+"").toUpperCase()},y=d.userAgent,z,A,B,C=Object.prototype.toString,D=Object.prototype.hasOwnProperty,E=Array.prototype.push,F=Array.prototype.slice,G=String.prototype.trim,H=Array.prototype.indexOf,I={};e.fn=e.prototype={constructor:e,init:function(a,d,f){var g,h,j,k;if(!a)return this;if(a.nodeType){this.context=this[0]=a,this.length=1;return this}if(a==="body"&&!d&&c.body){this.context=c,this[0]=c.body,this.selector=a,this.length=1;return this}if(typeof a=="string"){a.charAt(0)!=="<"||a.charAt(a.length-1)!==">"||a.length<3?g=i.exec(a):g=[null,a,null];if(g&&(g[1]||!d)){if(g[1]){d=d instanceof e?d[0]:d,k=d?d.ownerDocument||d:c,j=m.exec(a),j?e.isPlainObject(d)?(a=[c.createElement(j[1])],e.fn.attr.call(a,d,!0)):a=[k.createElement(j[1])]:(j=e.buildFragment([g[1]],[k]),a=(j.cacheable?e.clone(j.fragment):j.fragment).childNodes);return e.merge(this,a)}h=c.getElementById(g[2]);if(h&&h.parentNode){if(h.id!==g[2])return f.find(a);this.length=1,this[0]=h}this.context=c,this.selector=a;return this}return!d||d.jquery?(d||f).find(a):this.constructor(d).find(a)}if(e.isFunction(a))return f.ready(a);a.selector!==b&&(this.selector=a.selector,this.context=a.context);return e.makeArray(a,this)},selector:"",jquery:"1.7.1",length:0,size:function(){return this.length},toArray:function(){return F.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this[this.length+a]:this[a]},pushStack:function(a,b,c){var d=this.constructor();e.isArray(a)?E.apply(d,a):e.merge(d,a),d.prevObject=this,d.context=this.context,b==="find"?d.selector=this.selector+(this.selector?" ":"")+c:b&&(d.selector=this.selector+"."+b+"("+c+")");return d},each:function(a,b){return e.each(this,a,b)},ready:function(a){e.bindReady(),A.add(a);return this},eq:function(a){a=+a;return a===-1?this.slice(a):this.slice(a,a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(F.apply(this,arguments),"slice",F.call(arguments).join(","))},map:function(a){return this.pushStack(e.map(this,function(b,c){return a.call(b,c,b)}))},end:function(){return this.prevObject||this.constructor(null)},push:E,sort:[].sort,splice:[].splice},e.fn.init.prototype=e.fn,e.extend=e.fn.extend=function(){var a,c,d,f,g,h,i=arguments[0]||{},j=1,k=arguments.length,l=!1;typeof i=="boolean"&&(l=i,i=arguments[1]||{},j=2),typeof i!="object"&&!e.isFunction(i)&&(i={}),k===j&&(i=this,--j);for(;j0)return;A.fireWith(c,[e]),e.fn.trigger&&e(c).trigger("ready").off("ready")}},bindReady:function(){if(!A){A=e.Callbacks("once memory");if(c.readyState==="complete")return setTimeout(e.ready,1);if(c.addEventListener)c.addEventListener("DOMContentLoaded",B,!1),a.addEventListener("load",e.ready,!1);else if(c.attachEvent){c.attachEvent("onreadystatechange",B),a.attachEvent("onload",e.ready);var b=!1;try{b=a.frameElement==null}catch(d){}c.documentElement.doScroll&&b&&J()}}},isFunction:function(a){return e.type(a)==="function"},isArray:Array.isArray||function(a){return e.type(a)==="array"},isWindow:function(a){return a&&typeof a=="object"&&"setInterval"in a},isNumeric:function(a){return!isNaN(parseFloat(a))&&isFinite(a)},type:function(a){return a==null?String(a):I[C.call(a)]||"object"},isPlainObject:function(a){if(!a||e.type(a)!=="object"||a.nodeType||e.isWindow(a))return!1;try{if(a.constructor&&!D.call(a,"constructor")&&!D.call(a.constructor.prototype,"isPrototypeOf"))return!1}catch(c){return!1}var d;for(d in a);return d===b||D.call(a,d)},isEmptyObject:function(a){for(var b in a)return!1;return!0},error:function(a){throw new Error(a)},parseJSON:function(b){if(typeof b!="string"||!b)return null;b=e.trim(b);if(a.JSON&&a.JSON.parse)return a.JSON.parse(b);if(n.test(b.replace(o,"@").replace(p,"]").replace(q,"")))return(new Function("return "+b))();e.error("Invalid JSON: "+b)},parseXML:function(c){var d,f;try{a.DOMParser?(f=new DOMParser,d=f.parseFromString(c,"text/xml")):(d=new ActiveXObject("Microsoft.XMLDOM"),d.async="false",d.loadXML(c))}catch(g){d=b}(!d||!d.documentElement||d.getElementsByTagName("parsererror").length)&&e.error("Invalid XML: "+c);return d},noop:function(){},globalEval:function(b){b&&j.test(b)&&(a.execScript||function(b){a.eval.call(a,b)})(b)},camelCase:function(a){return a.replace(w,"ms-").replace(v,x)},nodeName:function(a,b){return a.nodeName&&a.nodeName.toUpperCase()===b.toUpperCase()},each:function(a,c,d){var f,g=0,h=a.length,i=h===b||e.isFunction(a);if(d){if(i){for(f in a)if(c.apply(a[f],d)===!1)break}else for(;g0&&a[0]&&a[j-1]||j===0||e.isArray(a));if(k)for(;i1?i.call(arguments,0):b,j.notifyWith(k,e)}}function l(a){return function(c){b[a]=arguments.length>1?i.call(arguments,0):c,--g||j.resolveWith(j,b)}}var b=i.call(arguments,0),c=0,d=b.length,e=Array(d),g=d,h=d,j=d<=1&&a&&f.isFunction(a.promise)?a:f.Deferred(),k=j.promise();if(d>1){for(;c
a",d=q.getElementsByTagName("*"),e=q.getElementsByTagName("a")[0];if(!d||!d.length||!e)return{};g=c.createElement("select"),h=g.appendChild(c.createElement("option")),i=q.getElementsByTagName("input")[0],b={leadingWhitespace:q.firstChild.nodeType===3,tbody:!q.getElementsByTagName("tbody").length,htmlSerialize:!!q.getElementsByTagName("link").length,style:/top/.test(e.getAttribute("style")),hrefNormalized:e.getAttribute("href")==="/a",opacity:/^0.55/.test(e.style.opacity),cssFloat:!!e.style.cssFloat,checkOn:i.value==="on",optSelected:h.selected,getSetAttribute:q.className!=="t",enctype:!!c.createElement("form").enctype,html5Clone:c.createElement("nav").cloneNode(!0).outerHTML!=="<:nav>",submitBubbles:!0,changeBubbles:!0,focusinBubbles:!1,deleteExpando:!0,noCloneEvent:!0,inlineBlockNeedsLayout:!1,shrinkWrapBlocks:!1,reliableMarginRight:!0},i.checked=!0,b.noCloneChecked=i.cloneNode(!0).checked,g.disabled=!0,b.optDisabled=!h.disabled;try{delete q.test}catch(s){b.deleteExpando=!1}!q.addEventListener&&q.attachEvent&&q.fireEvent&&(q.attachEvent("onclick",function(){b.noCloneEvent=!1}),q.cloneNode(!0).fireEvent("onclick")),i=c.createElement("input"),i.value="t",i.setAttribute("type","radio"),b.radioValue=i.value==="t",i.setAttribute("checked","checked"),q.appendChild(i),k=c.createDocumentFragment(),k.appendChild(q.lastChild),b.checkClone=k.cloneNode(!0).cloneNode(!0).lastChild.checked,b.appendChecked=i.checked,k.removeChild(i),k.appendChild(q),q.innerHTML="",a.getComputedStyle&&(j=c.createElement("div"),j.style.width="0",j.style.marginRight="0",q.style.width="2px",q.appendChild(j),b.reliableMarginRight=(parseInt((a.getComputedStyle(j,null)||{marginRight:0}).marginRight,10)||0)===0);if(q.attachEvent)for(o in{submit:1,change:1,focusin:1})n="on"+o,p=n in q,p||(q.setAttribute(n,"return;"),p=typeof q[n]=="function"),b[o+"Bubbles"]=p;k.removeChild(q),k=g=h=j=q=i=null,f(function(){var a,d,e,g,h,i,j,k,m,n,o,r=c.getElementsByTagName("body")[0];!r||(j=1,k="position:absolute;top:0;left:0;width:1px;height:1px;margin:0;",m="visibility:hidden;border:0;",n="style='"+k+"border:5px solid #000;padding:0;'",o="
"+""+"
",a=c.createElement("div"),a.style.cssText=m+"width:0;height:0;position:static;top:0;margin-top:"+j+"px",r.insertBefore(a,r.firstChild),q=c.createElement("div"),a.appendChild(q),q.innerHTML="
t
",l=q.getElementsByTagName("td"),p=l[0].offsetHeight===0,l[0].style.display="",l[1].style.display="none",b.reliableHiddenOffsets=p&&l[0].offsetHeight===0,q.innerHTML="",q.style.width=q.style.paddingLeft="1px",f.boxModel=b.boxModel=q.offsetWidth===2,typeof q.style.zoom!="undefined"&&(q.style.display="inline",q.style.zoom=1,b.inlineBlockNeedsLayout=q.offsetWidth===2,q.style.display="",q.innerHTML="
",b.shrinkWrapBlocks=q.offsetWidth!==2),q.style.cssText=k+m,q.innerHTML=o,d=q.firstChild,e=d.firstChild,h=d.nextSibling.firstChild.firstChild,i={doesNotAddBorder:e.offsetTop!==5,doesAddBorderForTableAndCells:h.offsetTop===5},e.style.position="fixed",e.style.top="20px",i.fixedPosition=e.offsetTop===20||e.offsetTop===15,e.style.position=e.style.top="",d.style.overflow="hidden",d.style.position="relative",i.subtractsBorderForOverflowNotVisible=e.offsetTop===-5,i.doesNotIncludeMarginInBodyOffset=r.offsetTop!==j,r.removeChild(a),q=a=null,f.extend(b,i))});return b}();var j=/^(?:\{.*\}|\[.*\])$/,k=/([A-Z])/g;f.extend({cache:{},uuid:0,expando:"jQuery"+(f.fn.jquery+Math.random()).replace(/\D/g,""),noData:{embed:!0,object:"clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",applet:!0},hasData:function(a){a=a.nodeType?f.cache[a[f.expando]]:a[f.expando];return!!a&&!m(a)},data:function(a,c,d,e){if(!!f.acceptData(a)){var g,h,i,j=f.expando,k=typeof c=="string",l=a.nodeType,m=l?f.cache:a,n=l?a[j]:a[j]&&j,o=c==="events";if((!n||!m[n]||!o&&!e&&!m[n].data)&&k&&d===b)return;n||(l?a[j]=n=++f.uuid:n=j),m[n]||(m[n]={},l||(m[n].toJSON=f.noop));if(typeof c=="object"||typeof c=="function")e?m[n]=f.extend(m[n],c):m[n].data=f.extend(m[n].data,c);g=h=m[n],e||(h.data||(h.data={}),h=h.data),d!==b&&(h[f.camelCase(c)]=d);if(o&&!h[c])return g.events;k?(i=h[c],i==null&&(i=h[f.camelCase(c)])):i=h;return i}},removeData:function(a,b,c){if(!!f.acceptData(a)){var d,e,g,h=f.expando,i=a.nodeType,j=i?f.cache:a,k=i?a[h]:h;if(!j[k])return;if(b){d=c?j[k]:j[k].data;if(d){f.isArray(b)||(b in d?b=[b]:(b=f.camelCase(b),b in d?b=[b]:b=b.split(" ")));for(e=0,g=b.length;e-1)return!0;return!1},val:function(a){var c,d,e,g=this[0];{if(!!arguments.length){e=f.isFunction(a);return this.each(function(d){var g=f(this),h;if(this.nodeType===1){e?h=a.call(this,d,g.val()):h=a,h==null?h="":typeof h=="number"?h+="":f.isArray(h)&&(h=f.map(h,function(a){return a==null?"":a+""})),c=f.valHooks[this.nodeName.toLowerCase()]||f.valHooks[this.type];if(!c||!("set"in c)||c.set(this,h,"value")===b)this.value=h}})}if(g){c=f.valHooks[g.nodeName.toLowerCase()]||f.valHooks[g.type];if(c&&"get"in c&&(d=c.get(g,"value"))!==b)return d;d=g.value;return typeof d=="string"?d.replace(q,""):d==null?"":d}}}}),f.extend({valHooks:{option:{get:function(a){var b=a.attributes.value;return!b||b.specified?a.value:a.text}},select:{get:function(a){var b,c,d,e,g=a.selectedIndex,h=[],i=a.options,j=a.type==="select-one";if(g<0)return null;c=j?g:0,d=j?g+1:i.length;for(;c=0}),c.length||(a.selectedIndex=-1);return c}}},attrFn:{val:!0,css:!0,html:!0,text:!0,data:!0,width:!0,height:!0,offset:!0},attr:function(a,c,d,e){var g,h,i,j=a.nodeType;if(!!a&&j!==3&&j!==8&&j!==2){if(e&&c in f.attrFn)return f(a)[c](d);if(typeof a.getAttribute=="undefined")return f.prop(a,c,d);i=j!==1||!f.isXMLDoc(a),i&&(c=c.toLowerCase(),h=f.attrHooks[c]||(u.test(c)?x:w));if(d!==b){if(d===null){f.removeAttr(a,c);return}if(h&&"set"in h&&i&&(g=h.set(a,d,c))!==b)return g;a.setAttribute(c,""+d);return d}if(h&&"get"in h&&i&&(g=h.get(a,c))!==null)return g;g=a.getAttribute(c);return g===null?b:g}},removeAttr:function(a,b){var c,d,e,g,h=0;if(b&&a.nodeType===1){d=b.toLowerCase().split(p),g=d.length;for(;h=0}})});var z=/^(?:textarea|input|select)$/i,A=/^([^\.]*)?(?:\.(.+))?$/,B=/\bhover(\.\S+)?\b/,C=/^key/,D=/^(?:mouse|contextmenu)|click/,E=/^(?:focusinfocus|focusoutblur)$/,F=/^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,G=function(a){var b=F.exec(a);b&&(b[1]=(b[1]||"").toLowerCase(),b[3]=b[3]&&new RegExp("(?:^|\\s)"+b[3]+"(?:\\s|$)"));return b},H=function(a,b){var c=a.attributes||{};return(!b[1]||a.nodeName.toLowerCase()===b[1])&&(!b[2]||(c.id||{}).value===b[2])&&(!b[3]||b[3].test((c["class"]||{}).value))},I=function(a){return f.event.special.hover?a:a.replace(B,"mouseenter$1 mouseleave$1")}; +f.event={add:function(a,c,d,e,g){var h,i,j,k,l,m,n,o,p,q,r,s;if(!(a.nodeType===3||a.nodeType===8||!c||!d||!(h=f._data(a)))){d.handler&&(p=d,d=p.handler),d.guid||(d.guid=f.guid++),j=h.events,j||(h.events=j={}),i=h.handle,i||(h.handle=i=function(a){return typeof f!="undefined"&&(!a||f.event.triggered!==a.type)?f.event.dispatch.apply(i.elem,arguments):b},i.elem=a),c=f.trim(I(c)).split(" ");for(k=0;k=0&&(h=h.slice(0,-1),k=!0),h.indexOf(".")>=0&&(i=h.split("."),h=i.shift(),i.sort());if((!e||f.event.customEvent[h])&&!f.event.global[h])return;c=typeof c=="object"?c[f.expando]?c:new f.Event(h,c):new f.Event(h),c.type=h,c.isTrigger=!0,c.exclusive=k,c.namespace=i.join("."),c.namespace_re=c.namespace?new RegExp("(^|\\.)"+i.join("\\.(?:.*\\.)?")+"(\\.|$)"):null,o=h.indexOf(":")<0?"on"+h:"";if(!e){j=f.cache;for(l in j)j[l].events&&j[l].events[h]&&f.event.trigger(c,d,j[l].handle.elem,!0);return}c.result=b,c.target||(c.target=e),d=d!=null?f.makeArray(d):[],d.unshift(c),p=f.event.special[h]||{};if(p.trigger&&p.trigger.apply(e,d)===!1)return;r=[[e,p.bindType||h]];if(!g&&!p.noBubble&&!f.isWindow(e)){s=p.delegateType||h,m=E.test(s+h)?e:e.parentNode,n=null;for(;m;m=m.parentNode)r.push([m,s]),n=m;n&&n===e.ownerDocument&&r.push([n.defaultView||n.parentWindow||a,s])}for(l=0;le&&i.push({elem:this,matches:d.slice(e)});for(j=0;j0?this.on(b,null,a,c):this.trigger(b)},f.attrFn&&(f.attrFn[b]=!0),C.test(b)&&(f.event.fixHooks[b]=f.event.keyHooks),D.test(b)&&(f.event.fixHooks[b]=f.event.mouseHooks)}),function(){function x(a,b,c,e,f,g){for(var h=0,i=e.length;h0){k=j;break}}j=j[a]}e[h]=k}}}function w(a,b,c,e,f,g){for(var h=0,i=e.length;h+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,d="sizcache"+(Math.random()+"").replace(".",""),e=0,g=Object.prototype.toString,h=!1,i=!0,j=/\\/g,k=/\r\n/g,l=/\W/;[0,0].sort(function(){i=!1;return 0});var m=function(b,d,e,f){e=e||[],d=d||c;var h=d;if(d.nodeType!==1&&d.nodeType!==9)return[];if(!b||typeof b!="string")return e;var i,j,k,l,n,q,r,t,u=!0,v=m.isXML(d),w=[],x=b;do{a.exec(""),i=a.exec(x);if(i){x=i[3],w.push(i[1]);if(i[2]){l=i[3];break}}}while(i);if(w.length>1&&p.exec(b))if(w.length===2&&o.relative[w[0]])j=y(w[0]+w[1],d,f);else{j=o.relative[w[0]]?[d]:m(w.shift(),d);while(w.length)b=w.shift(),o.relative[b]&&(b+=w.shift()),j=y(b,j,f)}else{!f&&w.length>1&&d.nodeType===9&&!v&&o.match.ID.test(w[0])&&!o.match.ID.test(w[w.length-1])&&(n=m.find(w.shift(),d,v),d=n.expr?m.filter(n.expr,n.set)[0]:n.set[0]);if(d){n=f?{expr:w.pop(),set:s(f)}:m.find(w.pop(),w.length===1&&(w[0]==="~"||w[0]==="+")&&d.parentNode?d.parentNode:d,v),j=n.expr?m.filter(n.expr,n.set):n.set,w.length>0?k=s(j):u=!1;while(w.length)q=w.pop(),r=q,o.relative[q]?r=w.pop():q="",r==null&&(r=d),o.relative[q](k,r,v)}else k=w=[]}k||(k=j),k||m.error(q||b);if(g.call(k)==="[object Array]")if(!u)e.push.apply(e,k);else if(d&&d.nodeType===1)for(t=0;k[t]!=null;t++)k[t]&&(k[t]===!0||k[t].nodeType===1&&m.contains(d,k[t]))&&e.push(j[t]);else for(t=0;k[t]!=null;t++)k[t]&&k[t].nodeType===1&&e.push(j[t]);else s(k,e);l&&(m(l,h,e,f),m.uniqueSort(e));return e};m.uniqueSort=function(a){if(u){h=i,a.sort(u);if(h)for(var b=1;b0},m.find=function(a,b,c){var d,e,f,g,h,i;if(!a)return[];for(e=0,f=o.order.length;e":function(a,b){var c,d=typeof b=="string",e=0,f=a.length;if(d&&!l.test(b)){b=b.toLowerCase();for(;e=0)?c||d.push(h):c&&(b[g]=!1));return!1},ID:function(a){return a[1].replace(j,"")},TAG:function(a,b){return a[1].replace(j,"").toLowerCase()},CHILD:function(a){if(a[1]==="nth"){a[2]||m.error(a[0]),a[2]=a[2].replace(/^\+|\s*/g,"");var b=/(-?)(\d*)(?:n([+\-]?\d*))?/.exec(a[2]==="even"&&"2n"||a[2]==="odd"&&"2n+1"||!/\D/.test(a[2])&&"0n+"+a[2]||a[2]);a[2]=b[1]+(b[2]||1)-0,a[3]=b[3]-0}else a[2]&&m.error(a[0]);a[0]=e++;return a},ATTR:function(a,b,c,d,e,f){var g=a[1]=a[1].replace(j,"");!f&&o.attrMap[g]&&(a[1]=o.attrMap[g]),a[4]=(a[4]||a[5]||"").replace(j,""),a[2]==="~="&&(a[4]=" "+a[4]+" ");return a},PSEUDO:function(b,c,d,e,f){if(b[1]==="not")if((a.exec(b[3])||"").length>1||/^\w/.test(b[3]))b[3]=m(b[3],null,null,c);else{var g=m.filter(b[3],c,d,!0^f);d||e.push.apply(e,g);return!1}else if(o.match.POS.test(b[0])||o.match.CHILD.test(b[0]))return!0;return b},POS:function(a){a.unshift(!0);return a}},filters:{enabled:function(a){return a.disabled===!1&&a.type!=="hidden"},disabled:function(a){return a.disabled===!0},checked:function(a){return a.checked===!0},selected:function(a){a.parentNode&&a.parentNode.selectedIndex;return a.selected===!0},parent:function(a){return!!a.firstChild},empty:function(a){return!a.firstChild},has:function(a,b,c){return!!m(c[3],a).length},header:function(a){return/h\d/i.test(a.nodeName)},text:function(a){var b=a.getAttribute("type"),c=a.type;return a.nodeName.toLowerCase()==="input"&&"text"===c&&(b===c||b===null)},radio:function(a){return a.nodeName.toLowerCase()==="input"&&"radio"===a.type},checkbox:function(a){return a.nodeName.toLowerCase()==="input"&&"checkbox"===a.type},file:function(a){return a.nodeName.toLowerCase()==="input"&&"file"===a.type},password:function(a){return a.nodeName.toLowerCase()==="input"&&"password"===a.type},submit:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"submit"===a.type},image:function(a){return a.nodeName.toLowerCase()==="input"&&"image"===a.type},reset:function(a){var b=a.nodeName.toLowerCase();return(b==="input"||b==="button")&&"reset"===a.type},button:function(a){var b=a.nodeName.toLowerCase();return b==="input"&&"button"===a.type||b==="button"},input:function(a){return/input|select|textarea|button/i.test(a.nodeName)},focus:function(a){return a===a.ownerDocument.activeElement}},setFilters:{first:function(a,b){return b===0},last:function(a,b,c,d){return b===d.length-1},even:function(a,b){return b%2===0},odd:function(a,b){return b%2===1},lt:function(a,b,c){return bc[3]-0},nth:function(a,b,c){return c[3]-0===b},eq:function(a,b,c){return c[3]-0===b}},filter:{PSEUDO:function(a,b,c,d){var e=b[1],f=o.filters[e];if(f)return f(a,c,b,d);if(e==="contains")return(a.textContent||a.innerText||n([a])||"").indexOf(b[3])>=0;if(e==="not"){var g=b[3];for(var h=0,i=g.length;h=0}},ID:function(a,b){return a.nodeType===1&&a.getAttribute("id")===b},TAG:function(a,b){return b==="*"&&a.nodeType===1||!!a.nodeName&&a.nodeName.toLowerCase()===b},CLASS:function(a,b){return(" "+(a.className||a.getAttribute("class"))+" ").indexOf(b)>-1},ATTR:function(a,b){var c=b[1],d=m.attr?m.attr(a,c):o.attrHandle[c]?o.attrHandle[c](a):a[c]!=null?a[c]:a.getAttribute(c),e=d+"",f=b[2],g=b[4];return d==null?f==="!=":!f&&m.attr?d!=null:f==="="?e===g:f==="*="?e.indexOf(g)>=0:f==="~="?(" "+e+" ").indexOf(g)>=0:g?f==="!="?e!==g:f==="^="?e.indexOf(g)===0:f==="$="?e.substr(e.length-g.length)===g:f==="|="?e===g||e.substr(0,g.length+1)===g+"-":!1:e&&d!==!1},POS:function(a,b,c,d){var e=b[2],f=o.setFilters[e];if(f)return f(a,c,b,d)}}},p=o.match.POS,q=function(a,b){return"\\"+(b-0+1)};for(var r in o.match)o.match[r]=new RegExp(o.match[r].source+/(?![^\[]*\])(?![^\(]*\))/.source),o.leftMatch[r]=new RegExp(/(^(?:.|\r|\n)*?)/.source+o.match[r].source.replace(/\\(\d+)/g,q));var s=function(a,b){a=Array.prototype.slice.call(a,0);if(b){b.push.apply(b,a);return b}return a};try{Array.prototype.slice.call(c.documentElement.childNodes,0)[0].nodeType}catch(t){s=function(a,b){var c=0,d=b||[];if(g.call(a)==="[object Array]")Array.prototype.push.apply(d,a);else if(typeof a.length=="number")for(var e=a.length;c",e.insertBefore(a,e.firstChild),c.getElementById(d)&&(o.find.ID=function(a,c,d){if(typeof c.getElementById!="undefined"&&!d){var e=c.getElementById(a[1]);return e?e.id===a[1]||typeof e.getAttributeNode!="undefined"&&e.getAttributeNode("id").nodeValue===a[1]?[e]:b:[]}},o.filter.ID=function(a,b){var c=typeof a.getAttributeNode!="undefined"&&a.getAttributeNode("id");return a.nodeType===1&&c&&c.nodeValue===b}),e.removeChild(a),e=a=null}(),function(){var a=c.createElement("div");a.appendChild(c.createComment("")),a.getElementsByTagName("*").length>0&&(o.find.TAG=function(a,b){var c=b.getElementsByTagName(a[1]);if(a[1]==="*"){var d=[];for(var e=0;c[e];e++)c[e].nodeType===1&&d.push(c[e]);c=d}return c}),a.innerHTML="",a.firstChild&&typeof a.firstChild.getAttribute!="undefined"&&a.firstChild.getAttribute("href")!=="#"&&(o.attrHandle.href=function(a){return a.getAttribute("href",2)}),a=null}(),c.querySelectorAll&&function(){var a=m,b=c.createElement("div"),d="__sizzle__";b.innerHTML="

";if(!b.querySelectorAll||b.querySelectorAll(".TEST").length!==0){m=function(b,e,f,g){e=e||c;if(!g&&!m.isXML(e)){var h=/^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec(b);if(h&&(e.nodeType===1||e.nodeType===9)){if(h[1])return s(e.getElementsByTagName(b),f);if(h[2]&&o.find.CLASS&&e.getElementsByClassName)return s(e.getElementsByClassName(h[2]),f)}if(e.nodeType===9){if(b==="body"&&e.body)return s([e.body],f);if(h&&h[3]){var i=e.getElementById(h[3]);if(!i||!i.parentNode)return s([],f);if(i.id===h[3])return s([i],f)}try{return s(e.querySelectorAll(b),f)}catch(j){}}else if(e.nodeType===1&&e.nodeName.toLowerCase()!=="object"){var k=e,l=e.getAttribute("id"),n=l||d,p=e.parentNode,q=/^\s*[+~]/.test(b);l?n=n.replace(/'/g,"\\$&"):e.setAttribute("id",n),q&&p&&(e=e.parentNode);try{if(!q||p)return s(e.querySelectorAll("[id='"+n+"'] "+b),f)}catch(r){}finally{l||k.removeAttribute("id")}}}return a(b,e,f,g)};for(var e in a)m[e]=a[e];b=null}}(),function(){var a=c.documentElement,b=a.matchesSelector||a.mozMatchesSelector||a.webkitMatchesSelector||a.msMatchesSelector;if(b){var d=!b.call(c.createElement("div"),"div"),e=!1;try{b.call(c.documentElement,"[test!='']:sizzle")}catch(f){e=!0}m.matchesSelector=function(a,c){c=c.replace(/\=\s*([^'"\]]*)\s*\]/g,"='$1']");if(!m.isXML(a))try{if(e||!o.match.PSEUDO.test(c)&&!/!=/.test(c)){var f=b.call(a,c);if(f||!d||a.document&&a.document.nodeType!==11)return f}}catch(g){}return m(c,null,null,[a]).length>0}}}(),function(){var a=c.createElement("div");a.innerHTML="
";if(!!a.getElementsByClassName&&a.getElementsByClassName("e").length!==0){a.lastChild.className="e";if(a.getElementsByClassName("e").length===1)return;o.order.splice(1,0,"CLASS"),o.find.CLASS=function(a,b,c){if(typeof b.getElementsByClassName!="undefined"&&!c)return b.getElementsByClassName(a[1])},a=null}}(),c.documentElement.contains?m.contains=function(a,b){return a!==b&&(a.contains?a.contains(b):!0)}:c.documentElement.compareDocumentPosition?m.contains=function(a,b){return!!(a.compareDocumentPosition(b)&16)}:m.contains=function(){return!1},m.isXML=function(a){var b=(a?a.ownerDocument||a:0).documentElement;return b?b.nodeName!=="HTML":!1};var y=function(a,b,c){var d,e=[],f="",g=b.nodeType?[b]:b;while(d=o.match.PSEUDO.exec(a))f+=d[0],a=a.replace(o.match.PSEUDO,"");a=o.relative[a]?a+"*":a;for(var h=0,i=g.length;h0)for(h=g;h=0:f.filter(a,this).length>0:this.filter(a).length>0)},closest:function(a,b){var c=[],d,e,g=this[0];if(f.isArray(a)){var h=1;while(g&&g.ownerDocument&&g!==b){for(d=0;d-1:f.find.matchesSelector(g,a)){c.push(g);break}g=g.parentNode;if(!g||!g.ownerDocument||g===b||g.nodeType===11)break}}c=c.length>1?f.unique(c):c;return this.pushStack(c,"closest",a)},index:function(a){if(!a)return this[0]&&this[0].parentNode?this.prevAll().length:-1;if(typeof a=="string")return f.inArray(this[0],f(a));return f.inArray(a.jquery?a[0]:a,this)},add:function(a,b){var c=typeof a=="string"?f(a,b):f.makeArray(a&&a.nodeType?[a]:a),d=f.merge(this.get(),c);return this.pushStack(S(c[0])||S(d[0])?d:f.unique(d))},andSelf:function(){return this.add(this.prevObject)}}),f.each({parent:function(a){var b=a.parentNode;return b&&b.nodeType!==11?b:null},parents:function(a){return f.dir(a,"parentNode")},parentsUntil:function(a,b,c){return f.dir(a,"parentNode",c)},next:function(a){return f.nth(a,2,"nextSibling")},prev:function(a){return f.nth(a,2,"previousSibling")},nextAll:function(a){return f.dir(a,"nextSibling")},prevAll:function(a){return f.dir(a,"previousSibling")},nextUntil:function(a,b,c){return f.dir(a,"nextSibling",c)},prevUntil:function(a,b,c){return f.dir(a,"previousSibling",c)},siblings:function(a){return f.sibling(a.parentNode.firstChild,a)},children:function(a){return f.sibling(a.firstChild)},contents:function(a){return f.nodeName(a,"iframe")?a.contentDocument||a.contentWindow.document:f.makeArray(a.childNodes)}},function(a,b){f.fn[a]=function(c,d){var e=f.map(this,b,c);L.test(a)||(d=c),d&&typeof d=="string"&&(e=f.filter(d,e)),e=this.length>1&&!R[a]?f.unique(e):e,(this.length>1||N.test(d))&&M.test(a)&&(e=e.reverse());return this.pushStack(e,a,P.call(arguments).join(","))}}),f.extend({filter:function(a,b,c){c&&(a=":not("+a+")");return b.length===1?f.find.matchesSelector(b[0],a)?[b[0]]:[]:f.find.matches(a,b)},dir:function(a,c,d){var e=[],g=a[c];while(g&&g.nodeType!==9&&(d===b||g.nodeType!==1||!f(g).is(d)))g.nodeType===1&&e.push(g),g=g[c];return e},nth:function(a,b,c,d){b=b||1;var e=0;for(;a;a=a[c])if(a.nodeType===1&&++e===b)break;return a},sibling:function(a,b){var c=[];for(;a;a=a.nextSibling)a.nodeType===1&&a!==b&&c.push(a);return c}});var V="abbr|article|aside|audio|canvas|datalist|details|figcaption|figure|footer|header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",W=/ jQuery\d+="(?:\d+|null)"/g,X=/^\s+/,Y=/<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,Z=/<([\w:]+)/,$=/",""],legend:[1,"
","
"],thead:[1,"","
"],tr:[2,"","
"],td:[3,"","
"],col:[2,"","
"],area:[1,"",""],_default:[0,"",""]},bh=U(c);bg.optgroup=bg.option,bg.tbody=bg.tfoot=bg.colgroup=bg.caption=bg.thead,bg.th=bg.td,f.support.htmlSerialize||(bg._default=[1,"div
","
"]),f.fn.extend({text:function(a){if(f.isFunction(a))return this.each(function(b){var c=f(this);c.text(a.call(this,b,c.text()))});if(typeof a!="object"&&a!==b)return this.empty().append((this[0]&&this[0].ownerDocument||c).createTextNode(a));return f.text(this)},wrapAll:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapAll(a.call(this,b))});if(this[0]){var b=f(a,this[0].ownerDocument).eq(0).clone(!0);this[0].parentNode&&b.insertBefore(this[0]),b.map(function(){var a=this;while(a.firstChild&&a.firstChild.nodeType===1)a=a.firstChild;return a}).append(this)}return this},wrapInner:function(a){if(f.isFunction(a))return this.each(function(b){f(this).wrapInner(a.call(this,b))});return this.each(function(){var b=f(this),c=b.contents();c.length?c.wrapAll(a):b.append(a)})},wrap:function(a){var b=f.isFunction(a);return this.each(function(c){f(this).wrapAll(b?a.call(this,c):a)})},unwrap:function(){return this.parent().each(function(){f.nodeName(this,"body")||f(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.appendChild(a)})},prepend:function(){return this.domManip(arguments,!0,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this)});if(arguments.length){var a=f.clean(arguments);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,!1,function(a){this.parentNode.insertBefore(a,this.nextSibling)});if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,f.clean(arguments));return a}},remove:function(a,b){for(var c=0,d;(d=this[c])!=null;c++)if(!a||f.filter(a,[d]).length)!b&&d.nodeType===1&&(f.cleanData(d.getElementsByTagName("*")), +f.cleanData([d])),d.parentNode&&d.parentNode.removeChild(d);return this},empty:function() +{for(var a=0,b;(b=this[a])!=null;a++){b.nodeType===1&&f.cleanData(b.getElementsByTagName("*"));while(b.firstChild)b.removeChild(b.firstChild)}return this},clone:function(a,b){a=a==null?!1:a,b=b==null?a:b;return this.map(function(){return f.clone(this,a,b)})},html:function(a){if(a===b)return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(W,""):null;if(typeof a=="string"&&!ba.test(a)&&(f.support.leadingWhitespace||!X.test(a))&&!bg[(Z.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Y,"<$1>");try{for(var c=0,d=this.length;c1&&l0?this.clone(!0):this).get();f(e[h])[b](j),d=d.concat(j)}return this.pushStack(d,a,e.selector)}}),f.extend({clone:function(a,b,c){var d,e,g,h=f.support.html5Clone||!bc.test("<"+a.nodeName)?a.cloneNode(!0):bo(a);if((!f.support.noCloneEvent||!f.support.noCloneChecked)&&(a.nodeType===1||a.nodeType===11)&&!f.isXMLDoc(a)){bk(a,h),d=bl(a),e=bl(h);for(g=0;d[g];++g)e[g]&&bk(d[g],e[g])}if(b){bj(a,h);if(c){d=bl(a),e=bl(h);for(g=0;d[g];++g)bj(d[g],e[g])}}d=e=null;return h},clean:function(a,b,d,e){var g;b=b||c,typeof b.createElement=="undefined"&&(b=b.ownerDocument||b[0]&&b[0].ownerDocument||c);var h=[],i;for(var j=0,k;(k=a[j])!=null;j++){typeof k=="number"&&(k+="");if(!k)continue;if(typeof k=="string")if(!_.test(k))k=b.createTextNode(k);else{k=k.replace(Y,"<$1>");var l=(Z.exec(k)||["",""])[1].toLowerCase(),m=bg[l]||bg._default,n=m[0],o=b.createElement("div");b===c?bh.appendChild(o):U(b).appendChild(o),o.innerHTML=m[1]+k+m[2];while(n--)o=o.lastChild;if(!f.support.tbody){var p=$.test(k),q=l==="table"&&!p?o.firstChild&&o.firstChild.childNodes:m[1]===""&&!p?o.childNodes:[];for(i=q.length-1;i>=0;--i)f.nodeName(q[i],"tbody")&&!q[i].childNodes.length&&q[i].parentNode.removeChild(q[i])}!f.support.leadingWhitespace&&X.test(k)&&o.insertBefore(b.createTextNode(X.exec(k)[0]),o.firstChild),k=o.childNodes}var r;if(!f.support.appendChecked)if(k[0]&&typeof (r=k.length)=="number")for(i=0;i=0)return b+"px"}}}),f.support.opacity||(f.cssHooks.opacity={get:function(a,b){return br.test((b&&a.currentStyle?a.currentStyle.filter:a.style.filter)||"")?parseFloat(RegExp.$1)/100+"":b?"1":""},set:function(a,b){var c=a.style,d=a.currentStyle,e=f.isNumeric(b)?"alpha(opacity="+b*100+")":"",g=d&&d.filter||c.filter||"";c.zoom=1;if(b>=1&&f.trim(g.replace(bq,""))===""){c.removeAttribute("filter");if(d&&!d.filter)return}c.filter=bq.test(g)?g.replace(bq,e):g+" "+e}}),f(function(){f.support.reliableMarginRight||(f.cssHooks.marginRight={get:function(a,b){var c;f.swap(a,{display:"inline-block"},function(){b?c=bz(a,"margin-right","marginRight"):c=a.style.marginRight});return c}})}),c.defaultView&&c.defaultView.getComputedStyle&&(bA=function(a,b){var c,d,e;b=b.replace(bs,"-$1").toLowerCase(),(d=a.ownerDocument.defaultView)&&(e=d.getComputedStyle(a,null))&&(c=e.getPropertyValue(b),c===""&&!f.contains(a.ownerDocument.documentElement,a)&&(c=f.style(a,b)));return c}),c.documentElement.currentStyle&&(bB=function(a,b){var c,d,e,f=a.currentStyle&&a.currentStyle[b],g=a.style;f===null&&g&&(e=g[b])&&(f=e),!bt.test(f)&&bu.test(f)&&(c=g.left,d=a.runtimeStyle&&a.runtimeStyle.left,d&&(a.runtimeStyle.left=a.currentStyle.left),g.left=b==="fontSize"?"1em":f||0,f=g.pixelLeft+"px",g.left=c,d&&(a.runtimeStyle.left=d));return f===""?"auto":f}),bz=bA||bB,f.expr&&f.expr.filters&&(f.expr.filters.hidden=function(a){var b=a.offsetWidth,c=a.offsetHeight;return b===0&&c===0||!f.support.reliableHiddenOffsets&&(a.style&&a.style.display||f.css(a,"display"))==="none"},f.expr.filters.visible=function(a){return!f.expr.filters.hidden(a)});var bD=/%20/g,bE=/\[\]$/,bF=/\r?\n/g,bG=/#.*$/,bH=/^(.*?):[ \t]*([^\r\n]*)\r?$/mg,bI=/^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,bJ=/^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,bK=/^(?:GET|HEAD)$/,bL=/^\/\//,bM=/\?/,bN=/)<[^<]*)*<\/script>/gi,bO=/^(?:select|textarea)/i,bP=/\s+/,bQ=/([?&])_=[^&]*/,bR=/^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,bS=f.fn.load,bT={},bU={},bV,bW,bX=["*/"]+["*"];try{bV=e.href}catch(bY){bV=c.createElement("a"),bV.href="",bV=bV.href}bW=bR.exec(bV.toLowerCase())||[],f.fn.extend({load:function(a,c,d){if(typeof a!="string"&&bS)return bS.apply(this,arguments);if(!this.length)return this;var e=a.indexOf(" ");if(e>=0){var g=a.slice(e,a.length);a=a.slice(0,e)}var h="GET";c&&(f.isFunction(c)?(d=c,c=b):typeof c=="object"&&(c=f.param(c,f.ajaxSettings.traditional),h="POST"));var i=this;f.ajax({url:a,type:h,dataType:"html",data:c,complete:function(a,b,c){c=a.responseText,a.isResolved()&&(a.done(function(a){c=a}),i.html(g?f("
").append(c.replace(bN,"")).find(g):c)),d&&i.each(d,[c,b,a])}});return this},serialize:function(){return f.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?f.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||bO.test(this.nodeName)||bI.test(this.type))}).map(function(a,b){var c=f(this).val();return c==null?null:f.isArray(c)?f.map(c,function(a,c){return{name:b.name,value:a.replace(bF,"\r\n")}}):{name:b.name,value:c.replace(bF,"\r\n")}}).get()}}),f.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "),function(a,b){f.fn[b]=function(a){return this.on(b,a)}}),f.each(["get","post"],function(a,c){f[c]=function(a,d,e,g){f.isFunction(d)&&(g=g||e,e=d,d=b);return f.ajax({type:c,url:a,data:d,success:e,dataType:g})}}),f.extend({getScript:function(a,c){return f.get(a,b,c,"script")},getJSON:function(a,b,c){return f.get(a,b,c,"json")},ajaxSetup:function(a,b){b?b_(a,f.ajaxSettings):(b=a,a=f.ajaxSettings),b_(a,b);return a},ajaxSettings:{url:bV,isLocal:bJ.test(bW[1]),global:!0,type:"GET",contentType:"application/x-www-form-urlencoded",processData:!0,async:!0,accepts:{xml:"application/xml, text/xml",html:"text/html",text:"text/plain",json:"application/json, text/javascript","*":bX},contents:{xml:/xml/,html:/html/,json:/json/},responseFields:{xml:"responseXML",text:"responseText"},converters:{"* text":a.String,"text html":!0,"text json":f.parseJSON,"text xml":f.parseXML},flatOptions:{context:!0,url:!0}},ajaxPrefilter:bZ(bT),ajaxTransport:bZ(bU),ajax:function(a,c){function w(a,c,l,m){if(s!==2){s=2,q&&clearTimeout(q),p=b,n=m||"",v.readyState=a>0?4:0;var o,r,u,w=c,x=l?cb(d,v,l):b,y,z;if(a>=200&&a<300||a===304){if(d.ifModified){if(y=v.getResponseHeader("Last-Modified"))f.lastModified[k]=y;if(z=v.getResponseHeader("Etag"))f.etag[k]=z}if(a===304)w="notmodified",o=!0;else try{r=cc(d,x),w="success",o=!0}catch(A){w="parsererror",u=A}}else{u=w;if(!w||a)w="error",a<0&&(a=0)}v.status=a,v.statusText=""+(c||w),o?h.resolveWith(e,[r,w,v]):h.rejectWith(e,[v,w,u]),v.statusCode(j),j=b,t&&g.trigger("ajax"+(o?"Success":"Error"),[v,d,o?r:u]),i.fireWith(e,[v,w]),t&&(g.trigger("ajaxComplete",[v,d]),--f.active||f.event.trigger("ajaxStop"))}}typeof a=="object"&&(c=a,a=b),c=c||{};var d=f.ajaxSetup({},c),e=d.context||d,g=e!==d&&(e.nodeType||e instanceof f)?f(e):f.event,h=f.Deferred(),i=f.Callbacks("once memory"),j=d.statusCode||{},k,l={},m={},n,o,p,q,r,s=0,t,u,v={readyState:0,setRequestHeader:function(a,b){if(!s){var c=a.toLowerCase();a=m[c]=m[c]||a,l[a]=b}return this},getAllResponseHeaders:function(){return s===2?n:null},getResponseHeader:function(a){var c;if(s===2){if(!o){o={};while(c=bH.exec(n))o[c[1].toLowerCase()]=c[2]}c=o[a.toLowerCase()]}return c===b?null:c},overrideMimeType:function(a){s||(d.mimeType=a);return this},abort:function(a){a=a||"abort",p&&p.abort(a),w(0,a);return this}};h.promise(v),v.success=v.done,v.error=v.fail,v.complete=i.add,v.statusCode=function(a){if(a){var b;if(s<2)for(b in a)j[b]=[j[b],a[b]];else b=a[v.status],v.then(b,b)}return this},d.url=((a||d.url)+"").replace(bG,"").replace(bL,bW[1]+"//"),d.dataTypes=f.trim(d.dataType||"*").toLowerCase().split(bP),d.crossDomain==null&&(r=bR.exec(d.url.toLowerCase()),d.crossDomain=!(!r||r[1]==bW[1]&&r[2]==bW[2]&&(r[3]||(r[1]==="http:"?80:443))==(bW[3]||(bW[1]==="http:"?80:443)))),d.data&&d.processData&&typeof d.data!="string"&&(d.data=f.param(d.data,d.traditional)),b$(bT,d,c,v);if(s===2)return!1;t=d.global,d.type=d.type.toUpperCase(),d.hasContent=!bK.test(d.type),t&&f.active++===0&&f.event.trigger("ajaxStart");if(!d.hasContent){d.data&&(d.url+=(bM.test(d.url)?"&":"?")+d.data,delete d.data),k=d.url;if(d.cache===!1){var x=f.now(),y=d.url.replace(bQ,"$1_="+x);d.url=y+(y===d.url?(bM.test(d.url)?"&":"?")+"_="+x:"")}}(d.data&&d.hasContent&&d.contentType!==!1||c.contentType)&&v.setRequestHeader("Content-Type",d.contentType),d.ifModified&&(k=k||d.url,f.lastModified[k]&&v.setRequestHeader("If-Modified-Since",f.lastModified[k]),f.etag[k]&&v.setRequestHeader("If-None-Match",f.etag[k])),v.setRequestHeader("Accept",d.dataTypes[0]&&d.accepts[d.dataTypes[0]]?d.accepts[d.dataTypes[0]]+(d.dataTypes[0]!=="*"?", "+bX+"; q=0.01":""):d.accepts["*"]);for(u in d.headers)v.setRequestHeader(u,d.headers[u]);if(d.beforeSend&&(d.beforeSend.call(e,v,d)===!1||s===2)){v.abort();return!1}for(u in{success:1,error:1,complete:1})v[u](d[u]);p=b$(bU,d,c,v);if(!p)w(-1,"No Transport");else{v.readyState=1,t&&g.trigger("ajaxSend",[v,d]),d.async&&d.timeout>0&&(q=setTimeout(function(){v.abort("timeout")},d.timeout));try{s=1,p.send(l,w)}catch(z){if(s<2)w(-1,z);else throw z}}return v},param:function(a,c){var d=[],e=function(a,b){b=f.isFunction(b)?b():b,d[d.length]=encodeURIComponent(a)+"="+encodeURIComponent(b)};c===b&&(c=f.ajaxSettings.traditional);if(f.isArray(a)||a.jquery&&!f.isPlainObject(a))f.each(a,function(){e(this.name,this.value)});else for(var g in a)ca(g,a[g],c,e);return d.join("&").replace(bD,"+")}}),f.extend({active:0,lastModified:{},etag:{}});var cd=f.now(),ce=/(\=)\?(&|$)|\?\?/i;f.ajaxSetup({jsonp:"callback",jsonpCallback:function(){return f.expando+"_"+cd++}}),f.ajaxPrefilter("json jsonp",function(b,c,d){var e=b.contentType==="application/x-www-form-urlencoded"&&typeof b.data=="string";if(b.dataTypes[0]==="jsonp"||b.jsonp!==!1&&(ce.test(b.url)||e&&ce.test(b.data))){var g,h=b.jsonpCallback=f.isFunction(b.jsonpCallback)?b.jsonpCallback():b.jsonpCallback,i=a[h],j=b.url,k=b.data,l="$1"+h+"$2";b.jsonp!==!1&&(j=j.replace(ce,l),b.url===j&&(e&&(k=k.replace(ce,l)),b.data===k&&(j+=(/\?/.test(j)?"&":"?")+b.jsonp+"="+h))),b.url=j,b.data=k,a[h]=function(a){g=[a]},d.always(function(){a[h]=i,g&&f.isFunction(i)&&a[h](g[0])}),b.converters["script json"]=function(){g||f.error(h+" was not called");return g[0]},b.dataTypes[0]="json";return"script"}}),f.ajaxSetup({accepts:{script:"text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"},contents:{script:/javascript|ecmascript/},converters:{"text script":function(a){f.globalEval(a);return a}}}),f.ajaxPrefilter("script",function(a){a.cache===b&&(a.cache=!1),a.crossDomain&&(a.type="GET",a.global=!1)}),f.ajaxTransport("script",function(a){if(a.crossDomain){var d,e=c.head||c.getElementsByTagName("head")[0]||c.documentElement;return{send:function(f,g){d=c.createElement("script"),d.async="async",a.scriptCharset&&(d.charset=a.scriptCharset),d.src=a.url,d.onload=d.onreadystatechange=function(a,c){if(c||!d.readyState||/loaded|complete/.test(d.readyState))d.onload=d.onreadystatechange=null,e&&d.parentNode&&e.removeChild(d),d=b,c||g(200,"success")},e.insertBefore(d,e.firstChild)},abort:function(){d&&d.onload(0,1)}}}});var cf=a.ActiveXObject?function(){for(var a in ch)ch[a](0,1)}:!1,cg=0,ch;f.ajaxSettings.xhr=a.ActiveXObject?function(){return!this.isLocal&&ci()||cj()}:ci,function(a){f.extend(f.support,{ajax:!!a,cors:!!a&&"withCredentials"in a})}(f.ajaxSettings.xhr()),f.support.ajax&&f.ajaxTransport(function(c) +{if(!c.crossDomain||f.support.cors){var d;return{send:function(e,g){var h=c.xhr(),i,j;c.username?h.open(c.type,c.url,c.async,c.username,c.password):h.open(c.type,c.url,c.async);if(c.xhrFields)for(j in c.xhrFields)h[j]=c.xhrFields[j];c.mimeType&&h.overrideMimeType&&h.overrideMimeType(c.mimeType),!c.crossDomain&&!e["X-Requested-With"]&&(e["X-Requested-With"]="XMLHttpRequest");try{for(j in e)h.setRequestHeader(j,e[j])}catch(k){}h.send(c.hasContent&&c.data||null),d=function(a,e){var j,k,l,m,n;try{if(d&&(e||h.readyState===4)){d=b,i&&(h.onreadystatechange=f.noop,cf&&delete ch[i]);if(e)h.readyState!==4&&h.abort();else{j=h.status,l=h.getAllResponseHeaders(),m={},n=h.responseXML,n&&n.documentElement&&(m.xml=n),m.text=h.responseText;try{k=h.statusText}catch(o){k=""}!j&&c.isLocal&&!c.crossDomain?j=m.text?200:404:j===1223&&(j=204)}}}catch(p){e||g(-1,p)}m&&g(j,k,m,l)},!c.async||h.readyState===4?d():(i=++cg,cf&&(ch||(ch={},f(a).unload(cf)),ch[i]=d),h.onreadystatechange=d)},abort:function(){d&&d(0,1)}}}});var ck={},cl,cm,cn=/^(?:toggle|show|hide)$/,co=/^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,cp,cq=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]],cr;f.fn.extend({show:function(a,b,c){var d,e;if(a||a===0)return this.animate(cu("show",3),a,b,c);for(var g=0,h=this.length;g=i.duration+this.startTime){this.now=this.end,this.pos=this.state=1,this.update(),i.animatedProperties[this.prop]=!0;for(b in i.animatedProperties)i.animatedProperties[b]!==!0&&(g=!1);if(g){i.overflow!=null&&!f.support.shrinkWrapBlocks&&f.each(["","X","Y"],function(a,b){h.style["overflow"+b]=i.overflow[a]}),i.hide&&f(h).hide();if(i.hide||i.show)for(b in i.animatedProperties)f.style(h,b,i.orig[b]),f.removeData(h,"fxshow"+b,!0),f.removeData(h,"toggle"+b,!0);d=i.complete,d&&(i.complete=!1,d.call(h))}return!1}i.duration==Infinity?this.now=e:(c=e-this.startTime,this.state=c/i.duration,this.pos=f.easing[i.animatedProperties[this.prop]](this.state,c,0,1,i.duration),this.now=this.start+(this.end-this.start)*this.pos),this.update();return!0}},f.extend(f.fx,{tick:function(){var a,b=f.timers,c=0;for(;c-1,k={},l={},m,n;j?(l=e.position(),m=l.top,n=l.left):(m=parseFloat(h)||0,n=parseFloat(i)||0),f.isFunction(b)&&(b=b.call(a,c,g)),b.top!=null&&(k.top=b.top-g.top+m),b.left!=null&&(k.left=b.left-g.left+n),"using"in b?b.using.call(a,k):e.css(k)}},f.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),c=this.offset(),d=cx.test(b[0].nodeName)?{top:0,left:0}:b.offset();c.top-=parseFloat(f.css(a,"marginTop"))||0,c.left-=parseFloat(f.css(a,"marginLeft"))||0,d.top+=parseFloat(f.css(b[0],"borderTopWidth"))||0,d.left+=parseFloat(f.css(b[0],"borderLeftWidth"))||0;return{top:c.top-d.top,left:c.left-d.left}},offsetParent:function(){return this.map(function(){var a=this.offsetParent||c.body;while(a&&!cx.test(a.nodeName)&&f.css(a,"position")==="static")a=a.offsetParent;return a})}}),f.each(["Left","Top"],function(a,c){var d="scroll"+c;f.fn[d]=function(c){var e,g;if(c===b){e=this[0];if(!e)return null;g=cy(e);return g?"pageXOffset"in g?g[a?"pageYOffset":"pageXOffset"]:f.support.boxModel&&g.document.documentElement[d]||g.document.body[d]:e[d]}return this.each(function(){g=cy(this),g?g.scrollTo(a?f(g).scrollLeft():c,a?c:f(g).scrollTop()):this[d]=c})}}),f.each(["Height","Width"],function(a,c){var d=c.toLowerCase();f.fn["inner"+c]=function(){var a=this[0];return a?a.style?parseFloat(f.css(a,d,"padding")):this[d]():null},f.fn["outer"+c]=function(a){var b=this[0];return b?b.style?parseFloat(f.css(b,d,a?"margin":"border")):this[d]():null},f.fn[d]=function(a){var e=this[0];if(!e)return a==null?null:this;if(f.isFunction(a))return this.each(function(b){var c=f(this);c[d](a.call(this,b,c[d]()))});if(f.isWindow(e)){var g=e.document.documentElement["client"+c],h=e.document.body;return e.document.compatMode==="CSS1Compat"&&g||h&&h["client"+c]||g}if(e.nodeType===9)return Math.max(e.documentElement["client"+c],e.body["scroll"+c],e.documentElement["scroll"+c],e.body["offset"+c],e.documentElement["offset"+c]);if(a===b){var i=f.css(e,d),j=parseFloat(i);return f.isNumeric(j)?j:i}return this.css(d,typeof a=="string"?a:a+"px")}}),a.jQuery=a.$=f,typeof define=="function"&&define.amd&&define.amd.jQuery&&define("jquery",[],function(){return f})})(window); diff --git a/cub/docs/images/kernel_abstraction.png b/docs/html/kernel_abstraction.png similarity index 100% rename from cub/docs/images/kernel_abstraction.png rename to docs/html/kernel_abstraction.png diff --git a/cub/docs/images/kogge_stone_scan.png b/docs/html/kogge_stone_scan.png similarity index 100% rename from cub/docs/images/kogge_stone_scan.png rename to docs/html/kogge_stone_scan.png diff --git a/docs/html/modules.html b/docs/html/modules.html new file mode 100644 index 0000000000..f36812ffd6 --- /dev/null +++ b/docs/html/modules.html @@ -0,0 +1,112 @@ + + + + + + + +CUB: Modules + + + + + + + + + + + + +
+
+
+ + + + + +
+
CUB +
+
+ + + + + + + + + + +
+ +
+ +
+
+
Modules
+
+
+
Here is a list of all modules:
+
+ + + + + diff --git a/docs/html/namespacecub.html b/docs/html/namespacecub.html new file mode 100644 index 0000000000..af535a48ed --- /dev/null +++ b/docs/html/namespacecub.html @@ -0,0 +1,547 @@ + + + + + + + +CUB: cub Namespace Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ +
+
+ +
+
cub Namespace Reference
+
+
+ +

CUB namespace. +More...

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  ArrayTraits
 Array traits. More...
 
struct  BaseTraits
 Basic type traits. More...
 
class  BlockDiscontinuity
 BlockDiscontinuity provides operations for flagging discontinuities within a list of data items partitioned across a CUDA threadblock.

+
+discont_logo.png +
+.
+ More...
 
class  BlockExchange
 BlockExchange provides operations for reorganizing the partitioning of ordered data across a CUDA threadblock.

+
+transpose_logo.png +
+.
+ More...
 
class  BlockLoad
 BlockLoad provides data movement operations for reading block-arranged data from global memory.

+
+block_load_logo.png +
+.
+ More...
 
class  BlockRadixSort
 BlockRadixSort provides variants of parallel radix sorting across a CUDA threadblock.

+
+sorting_logo.png +
+.
+ More...
 
class  BlockReduce
 BlockReduce provides variants of parallel reduction across a CUDA threadblock.

+
+reduce_logo.png +
+.
+ More...
 
class  BlockScan
 BlockScan provides variants of parallel prefix scan (and prefix sum) across a CUDA threadblock.

+
+scan_logo.png +
+.
+ More...
 
class  BlockStore
 BlockStore provides data movement operations for writing blocked-arranged data to global memory.

+
+block_store_logo.png +
+.
+ More...
 
struct  EnableIf
 Simple enable-if (similar to Boost) More...
 
struct  Equality
 Default equality functor. More...
 
struct  Equals
 Type equality test. More...
 
struct  If
 Type selection (IF ? ThenType : ElseType) More...
 
struct  IsVolatile
 Volatile modifier test. More...
 
struct  Log2
 Statically determine log2(N), rounded up. More...
 
struct  Max
 Default max functor. More...
 
struct  NullType
 A simple "NULL" marker type. More...
 
struct  NumericTraits
 Numeric type traits. More...
 
struct  RemoveQualifiers
 Removes const and volatile qualifiers from type Tp. More...
 
struct  Sum
 Default sum functor. More...
 
struct  Traits
 Type traits. More...
 
class  WarpScan
 WarpScan provides variants of parallel prefix scan across a CUDA warp.

+
+warp_scan_logo.png +
+.
+ More...
 
+ + + + + + + + + + + + + + + + + + + +

+Enumerations

enum  BlockLoadPolicy { BLOCK_LOAD_DIRECT, +BLOCK_LOAD_VECTORIZE, +BLOCK_LOAD_TRANSPOSE + }
 Tuning policy for cub::BlockLoad. More...
 
enum  BlockScanPolicy { BLOCK_SCAN_RAKING, +BLOCK_SCAN_WARPSCANS + }
 Tuning policy for cub::BlockScan. More...
 
enum  BlockStorePolicy { BLOCK_STORE_DIRECT, +BLOCK_STORE_VECTORIZE, +BLOCK_STORE_TRANSPOSE + }
 Tuning policy for cub::BlockStore. More...
 
enum  Category { NOT_A_NUMBER, +SIGNED_INTEGER, +UNSIGNED_INTEGER, +FLOATING_POINT + }
 Basic type traits categories.
 
enum  PtxLoadModifier {
+  PTX_LOAD_NONE, +PTX_LOAD_CA, +PTX_LOAD_CG, +PTX_LOAD_CS, +
+  PTX_LOAD_CV, +PTX_LOAD_LDG, +PTX_LOAD_VS +
+ }
 Enumeration of PTX cache-modifiers for memory load operations. More...
 
enum  PtxStoreModifier {
+  PTX_STORE_NONE, +PTX_STORE_WB, +PTX_STORE_CG, +PTX_STORE_CS, +
+  PTX_STORE_WT, +PTX_STORE_VS +
+ }
 Enumeration of PTX cache-modifiers for memory store operations. More...
 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Functions

__host__ __device__
+__forceinline__ cudaError_t 
Debug (cudaError_t error, const char *message, const char *filename, int line)
 If CUB_STDERR is defined and error is not cudaSuccess, message is printed to stderr along with the supplied source context. More...
 
__host__ __device__
+__forceinline__ cudaError_t 
Debug (cudaError_t error, const char *filename, int line)
 If CUB_STDERR is defined and error is not cudaSuccess, the corresponding error message is printed to stderr along with the supplied source context. More...
 
Direct threadblock loads (blocked arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void BlockLoadDirect (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly, guarded by range. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirect (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly, guarded by range, with assignment for out-of-bound elements. More...
 
Direct threadblock loads (striped arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator >
__device__ __forceinline__ void BlockLoadDirectStriped (InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped directly tile using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly, guarded by range. More...
 
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped directly tile using the specified cache modifier, guarded by range, with assignment for out-of-bound elements. More...
 
template<typename T , int ITEMS_PER_THREAD, typename InputIterator , typename SizeT >
__device__ __forceinline__ void BlockLoadDirectStriped (InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Load striped tile directly, guarded by range, with assignment for out-of-bound elements. More...
 
Threadblock vectorized loads (blocked arrangement)
template<PtxLoadModifier MODIFIER, typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void BlockLoadVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Load a tile of items across a threadblock directly. More...
 
Direct threadblock stores (blocked arrangement)
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void BlockStoreDirect (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void BlockStoreDirect (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly. More...
 
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void BlockStoreDirect (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly using the specified cache modifier, guarded by range. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void BlockStoreDirect (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly, guarded by range. More...
 
Direct threadblock stores (striped arrangement)
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void BlockStoreDirectStriped (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Store striped tile directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator >
__device__ __forceinline__ void BlockStoreDirectStriped (OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Store striped tile directly. More...
 
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void BlockStoreDirectStriped (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 
template<typename T , int ITEMS_PER_THREAD, typename OutputIterator , typename SizeT >
__device__ __forceinline__ void BlockStoreDirectStriped (OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)
 Store striped tile directly, guarded by range. More...
 
Threadblock vectorized stores (blocked arrangement)
template<PtxStoreModifier MODIFIER, typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void BlockStoreVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly using the specified cache modifier. More...
 
template<typename T , int ITEMS_PER_THREAD>
__device__ __forceinline__ void BlockStoreVectorized (T *block_ptr, T(&items)[ITEMS_PER_THREAD])
 Store a tile of items across a threadblock directly. More...
 
Thread utilities for memory I/O using PTX cache modifiers
template<PtxLoadModifier MODIFIER, typename InputIterator >
__device__ __forceinline__
+std::iterator_traits
+< InputIterator >::value_type 
ThreadLoad (InputIterator itr)
 Thread utility for reading memory using cub::PtxLoadModifier cache modifiers. More...
 
template<PtxStoreModifier MODIFIER, typename OutputIterator , typename T >
__device__ __forceinline__ void ThreadStore (OutputIterator itr, const T &val)
 Thread utility for writing memory using cub::PtxStoreModifier cache modifiers. More...
 
+

Detailed Description

+

CUB namespace.

+

Enumeration Type Documentation

+ +
+
+ + + + +
enum cub::BlockScanPolicy
+
+ +

Tuning policy for cub::BlockScan.

+ + + +
Enumerator
BLOCK_SCAN_RAKING  +
Overview
An efficient "raking reduce-then-scan" prefix scan algorithm. Scan execution is comprised of five phases:
    +
  1. Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory.
  2. +
  3. Upsweep sequential reduction in shared memory. Threads within a single warp rake across segments of shared partial reductions.
  4. +
  5. A warp-synchronous Kogge-Stone style exclusive scan within the raking warp.
  6. +
  7. Downsweep sequential exclusive scan in shared memory. Threads within a single warp rake across segments of shared partial reductions, seeded with the warp-scan output.
  8. +
  9. Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output.
  10. +
+
+
+block_scan_raking.png +
+
BLOCK_SCAN_RAKING data flow for a hypothetical 16-thread threadblock and 4-thread raking warp.
+
Performance Considerations
    +
  • Although this variant may suffer longer turnaround latencies when the GPU is under-occupied, it can often provide higher overall throughput across the GPU when suitably occupied.
  • +
+
+
BLOCK_SCAN_WARPSCANS  +
Overview
A quick "tiled warpscans" prefix scan algorithm. Scan execution is comprised of four phases:
    +
  1. Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory.
  2. +
  3. Compute a shallow, but inefficient warp-synchronous Kogge-Stone style scan within each warp.
  4. +
  5. A propagation phase where the warp scan outputs in each warp are updated with the aggregate from each preceding warp.
  6. +
  7. Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output.
  8. +
+
+
+block_scan_warpscans.png +
+
BLOCK_SCAN_WARPSCANS data flow for a hypothetical 16-thread threadblock and 4-thread raking warp.
+
Performance Considerations
    +
  • Although this variant may suffer lower overall throughput across the GPU because due to a heavy reliance on inefficient warpscans, it can often provide lower turnaround latencies when the GPU is under-occupied.
  • +
+
+
+ +
+
+ +
+
+ + + + +
enum cub::BlockLoadPolicy
+
+ +

Tuning policy for cub::BlockLoad.

+ + + + +
Enumerator
BLOCK_LOAD_DIRECT  +
Overview
+

A blocked arrangement of data is read directly from memory. The threadblock reads items in a parallel "raking" fashion: threadi reads the ith segment of consecutive elements.

+
Performance Considerations
    +
  • The utilization of memory transactions (coalescing) decreases as the access stride between threads increases (i.e., the number items per thread).
  • +
+
+
BLOCK_LOAD_VECTORIZE  +
Overview
+

A blocked arrangement of data is read directly from memory using CUDA's built-in vectorized loads as a coalescing optimization. The threadblock reads items in a parallel "raking" fashion: threadi uses vector loads to read the ith segment of consecutive elements.

+

For example, ld.global.v4.s32 instructions will be generated when T = int and ITEMS_PER_THREAD > 4.

+
Performance Considerations
    +
  • The utilization of memory transactions (coalescing) remains high until the the access stride between threads (i.e., the number items per thread) exceeds the maximum vector load width (typically 4 items or 64B, whichever is lower).
  • +
  • The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT:
      +
    • ITEMS_PER_THREAD is odd
    • +
    • The InputIterator is not a simple pointer type
    • +
    • The block input offset is not quadword-aligned
    • +
    • The data type T is not a built-in primitive or CUDA vector type (e.g., short, int2, double, float2, etc.)
    • +
    +
  • +
+
+
BLOCK_LOAD_TRANSPOSE  +
Overview
+

A striped arrangement of data is read directly from memory and then is locally transposed into a blocked arrangement. The threadblock reads items in a parallel "strip-mining" fashion: threadi reads items having stride BLOCK_THREADS between them. cub::BlockExchange is then used to locally reorder the items into a blocked arrangement.

+
Performance Considerations
    +
  • The utilization of memory transactions (coalescing) remains high regardless of items loaded per thread.
  • +
  • The local reordering incurs slightly longer latencies and throughput than the direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives.
  • +
+
+
+ +
+
+ +
+
+ + + + +
enum cub::BlockStorePolicy
+
+ +

Tuning policy for cub::BlockStore.

+ + + + +
Enumerator
BLOCK_STORE_DIRECT  +
Overview
+

A blocked arrangement of data is written directly to memory. The threadblock writes items in a parallel "raking" fashion: threadi writes the ith segment of consecutive elements.

+
Performance Considerations
    +
  • The utilization of memory transactions (coalescing) decreases as the access stride between threads increases (i.e., the number items per thread).
  • +
+
+
BLOCK_STORE_VECTORIZE  +
Overview
+

A blocked arrangement of data is written directly to memory using CUDA's built-in vectorized stores as a coalescing optimization. The threadblock writes items in a parallel "raking" fashion: threadi uses vector stores to write the ith segment of consecutive elements.

+

For example, st.global.v4.s32 instructions will be generated when T = int and ITEMS_PER_THREAD > 4.

+
Performance Considerations
    +
  • The utilization of memory transactions (coalescing) remains high until the the access stride between threads (i.e., the number items per thread) exceeds the maximum vector load width (typically 4 items or 64B, whichever is lower).
  • +
  • The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_STORE_DIRECT:
      +
    • ITEMS_PER_THREAD is odd
    • +
    • The OutputIterator is not a simple pointer type
    • +
    • The block output offset is not quadword-aligned
    • +
    • The data type T is not a built-in primitive or CUDA vector type (e.g., short, int2, double, float2, etc.)
    • +
    +
  • +
+
+
BLOCK_STORE_TRANSPOSE  +
Overview
A blocked arrangement is locally transposed into a striped arrangement which is then written to memory. More specifically, cub::BlockExchange used to locally reorder the items into a striped arrangement, after which the threadblock writes items in a parallel "strip-mining" fashion: consecutive items owned by threadi are written to memory with stride BLOCK_THREADS between them.
+
Performance Considerations
    +
  • The utilization of memory transactions (coalescing) remains high regardless of items written per thread.
  • +
  • The local reordering incurs slightly longer latencies and throughput than the direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives.
  • +
+
+
+ +
+
+
+ + + + + diff --git a/docs/html/nav_f.png b/docs/html/nav_f.png new file mode 100644 index 0000000000..72a58a529e Binary files /dev/null and b/docs/html/nav_f.png differ diff --git a/docs/html/nav_g.png b/docs/html/nav_g.png new file mode 100644 index 0000000000..2093a237a9 Binary files /dev/null and b/docs/html/nav_g.png differ diff --git a/docs/html/nav_h.png b/docs/html/nav_h.png new file mode 100644 index 0000000000..33389b101d Binary files /dev/null and b/docs/html/nav_h.png differ diff --git a/docs/html/open.png b/docs/html/open.png new file mode 100644 index 0000000000..30f75c7efe Binary files /dev/null and b/docs/html/open.png differ diff --git a/docs/html/operators_8cuh.html b/docs/html/operators_8cuh.html new file mode 100644 index 0000000000..5f84578622 --- /dev/null +++ b/docs/html/operators_8cuh.html @@ -0,0 +1,134 @@ + + + + + + + +CUB: operators.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
operators.cuh File Reference
+
+
+
#include "type_utils.cuh"
+#include "ns_wrapper.cuh"
+
+ + + + + + + + + + +

+Classes

struct  cub::Equality< T >
 Default equality functor. More...
 
struct  cub::Max< T >
 Default max functor. More...
 
struct  cub::Sum< T >
 Default sum functor. More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+

Detailed Description

+

Simple binary operator functor types

+
+ + + + + diff --git a/cub/docs/images/raking.png b/docs/html/raking.png similarity index 100% rename from cub/docs/images/raking.png rename to docs/html/raking.png diff --git a/cub/docs/images/reduce_logo.png b/docs/html/reduce_logo.png similarity index 100% rename from cub/docs/images/reduce_logo.png rename to docs/html/reduce_logo.png diff --git a/cub/docs/images/scan_logo.png b/docs/html/scan_logo.png similarity index 100% rename from cub/docs/images/scan_logo.png rename to docs/html/scan_logo.png diff --git a/docs/html/search/all_61.html b/docs/html/search/all_61.html new file mode 100644 index 0000000000..f85089b559 --- /dev/null +++ b/docs/html/search/all_61.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_61.js b/docs/html/search/all_61.js new file mode 100644 index 0000000000..bb41a712e7 --- /dev/null +++ b/docs/html/search/all_61.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['arraytraits',['ArrayTraits',['../structcub_1_1_array_traits.html',1,'cub']]] +]; diff --git a/docs/html/search/all_62.html b/docs/html/search/all_62.html new file mode 100644 index 0000000000..f25fa2c88c --- /dev/null +++ b/docs/html/search/all_62.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_62.js b/docs/html/search/all_62.js new file mode 100644 index 0000000000..5685f0dcc3 --- /dev/null +++ b/docs/html/search/all_62.js @@ -0,0 +1,39 @@ +var searchData= +[ + ['basetraits',['BaseTraits',['../structcub_1_1_base_traits.html',1,'cub']]], + ['basetraits_3c_20not_5fa_5fnumber_2c_20false_2c_20false_2c_20removequalifiers_3c_20t_20_3e_3a_3atype_20_3e',['BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type >',['../structcub_1_1_base_traits.html',1,'cub']]], + ['basetraits_3c_20not_5fa_5fnumber_2c_20false_2c_20false_2c_20t_20_3e',['BaseTraits< NOT_A_NUMBER, false, false, T >',['../structcub_1_1_base_traits.html',1,'cub']]], + ['block_5fdiscontinuity_2ecuh',['block_discontinuity.cuh',['../block__discontinuity_8cuh.html',1,'']]], + ['block_5fexchange_2ecuh',['block_exchange.cuh',['../block__exchange_8cuh.html',1,'']]], + ['block_5fload_2ecuh',['block_load.cuh',['../block__load_8cuh.html',1,'']]], + ['block_5fload_5fdirect',['BLOCK_LOAD_DIRECT',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290a2d4d8900d7e697e9dac4062e97d3d835',1,'cub']]], + ['block_5fload_5ftranspose',['BLOCK_LOAD_TRANSPOSE',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290acd94f285472e8f7c883a7407f6f4efc4',1,'cub']]], + ['block_5fload_5fvectorize',['BLOCK_LOAD_VECTORIZE',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290a826be9d4df1c44c0e5c00a9c9c136965',1,'cub']]], + ['block_5fradix_5fsort_2ecuh',['block_radix_sort.cuh',['../block__radix__sort_8cuh.html',1,'']]], + ['block_5freduce_2ecuh',['block_reduce.cuh',['../block__reduce_8cuh.html',1,'']]], + ['block_5fscan_2ecuh',['block_scan.cuh',['../block__scan_8cuh.html',1,'']]], + ['block_5fscan_5fraking',['BLOCK_SCAN_RAKING',['../namespacecub.html#aa7484021273cbfd89229a6b5c205b9f1a0fa6cac57b7df2f475a67af053b9371c',1,'cub']]], + ['block_5fscan_5fwarpscans',['BLOCK_SCAN_WARPSCANS',['../namespacecub.html#aa7484021273cbfd89229a6b5c205b9f1a08bbb9b8f17a4b9e568c1333aeda6324',1,'cub']]], + ['block_5fstore_2ecuh',['block_store.cuh',['../block__store_8cuh.html',1,'']]], + ['block_5fstore_5fdirect',['BLOCK_STORE_DIRECT',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189ea9b8dcc7b6b06bcfc24af4f499523b880',1,'cub']]], + ['block_5fstore_5ftranspose',['BLOCK_STORE_TRANSPOSE',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189eab0bbe20613466c3cedfcfea33a97d69c',1,'cub']]], + ['block_5fstore_5fvectorize',['BLOCK_STORE_VECTORIZE',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189ea0ccd625a7f2f3649155cbd5a27adfb41',1,'cub']]], + ['blockdiscontinuity',['BlockDiscontinuity',['../classcub_1_1_block_discontinuity.html',1,'cub']]], + ['blockedtostriped',['BlockedToStriped',['../classcub_1_1_block_exchange.html#a068f68d3f9d5c53920eeae82594d6935',1,'cub::BlockExchange']]], + ['blockexchange',['BlockExchange',['../classcub_1_1_block_exchange.html',1,'cub']]], + ['blockload',['BlockLoad',['../classcub_1_1_block_load.html',1,'cub']]], + ['blockloaddirect',['BlockLoadDirect',['../group___simt_utils.html#ga2ece00cc00c1d3269ee79ddf60d15457',1,'cub::BlockLoadDirect(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga51495fa39938ecf57056d4ca6f0260de',1,'cub::BlockLoadDirect(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga01e0a2d42d5b20aab660815c5cf258a0',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gaac537b6a8c9caaae1e6e77e9717e9541',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gae910789e82acd344d6f5a4cc50beef03',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gac20fbd7aaa120e661575fe6e8028a015',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])']]], + ['blockloaddirectstriped',['BlockLoadDirectStriped',['../group___simt_utils.html#ga10442f4a83e49fb4a414ce6ce9234b79',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga74f3768367f80c79037b3e77c13bf4bc',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga405e4ed36717a6d2c0584578ab94923a',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga7ba15be704f5aa7c7db809a66af43160',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gabf20f04ee43adc4661429a7902f71911',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gaf826ded39a7e107a5f15416d4b147be0',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)']]], + ['blockloadpolicy',['BlockLoadPolicy',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290',1,'cub']]], + ['blockloadvectorized',['BlockLoadVectorized',['../group___simt_utils.html#gaea8200ef976bb588c569e039ea79005c',1,'cub::BlockLoadVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gab1a8ffc7fe70a636a3d09403344cfced',1,'cub::BlockLoadVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])']]], + ['blockradixsort',['BlockRadixSort',['../classcub_1_1_block_radix_sort.html',1,'cub']]], + ['blockreduce',['BlockReduce',['../classcub_1_1_block_reduce.html',1,'cub']]], + ['blockscan',['BlockScan',['../classcub_1_1_block_scan.html',1,'cub']]], + ['blockscanpolicy',['BlockScanPolicy',['../namespacecub.html#aa7484021273cbfd89229a6b5c205b9f1',1,'cub']]], + ['blockstore',['BlockStore',['../classcub_1_1_block_store.html',1,'cub']]], + ['blockstoredirect',['BlockStoreDirect',['../group___simt_utils.html#gaa8f12f02c082f8d689100b8ac88f8f61',1,'cub::BlockStoreDirect(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga2d52e8ce92c8bc044898cc289a7e96b4',1,'cub::BlockStoreDirect(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga8b5f82ad8487072b6cc80b312db1962d',1,'cub::BlockStoreDirect(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga34a623c83894408f4f05ceb788d5ac92',1,'cub::BlockStoreDirect(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])']]], + ['blockstoredirectstriped',['BlockStoreDirectStriped',['../group___simt_utils.html#gaa18341f23a5d00c1b148e0013a9cc637',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gaed26402e843c84978ce85da24819ebeb',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gadcef89bcc6b3c66e1fa1267c15b08a78',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga75150e5519f86c1054d7a7584e1a4f23',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)']]], + ['blockstorepolicy',['BlockStorePolicy',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189e',1,'cub']]], + ['blockstorevectorized',['BlockStoreVectorized',['../group___simt_utils.html#ga013c3ab8214854f45e8d678958e7dde9',1,'cub::BlockStoreVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga5db0cef20c11ea62aef484c587c4e064',1,'cub::BlockStoreVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])']]], + ['bibliographic_20references',['Bibliographic References',['../citelist.html',1,'']]] +]; diff --git a/docs/html/search/all_63.html b/docs/html/search/all_63.html new file mode 100644 index 0000000000..e7f34db586 --- /dev/null +++ b/docs/html/search/all_63.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_63.js b/docs/html/search/all_63.js new file mode 100644 index 0000000000..363e157875 --- /dev/null +++ b/docs/html/search/all_63.js @@ -0,0 +1,9 @@ +var searchData= +[ + ['category',['CATEGORY',['../structcub_1_1_base_traits.html#a25ff6477c84dc3bd5f4b5e70cd600f09',1,'cub::BaseTraits::CATEGORY()'],['../namespacecub.html#a4733b6d40e923244502e6f5b200766ef',1,'cub::Category()']]], + ['cub',['cub',['../namespacecub.html',1,'']]], + ['cub_5fhas_5fnested_5ftype',['CUB_HAS_NESTED_TYPE',['../type__utils_8cuh.html#ad785ef798316018015561fda4feec8af',1,'type_utils.cuh']]], + ['cubdebug',['CubDebug',['../debug_8cuh.html#a04236bb0db0efe7a19c9ecba0aedc1e5',1,'debug.cuh']]], + ['cubdebugexit',['CubDebugExit',['../debug_8cuh.html#a2e5de1db78fd84552bda8254efa409a3',1,'debug.cuh']]], + ['cooperative_20simt_20operations',['Cooperative SIMT Operations',['../group___simt_coop.html',1,'']]] +]; diff --git a/docs/html/search/all_64.html b/docs/html/search/all_64.html new file mode 100644 index 0000000000..360601fa72 --- /dev/null +++ b/docs/html/search/all_64.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_64.js b/docs/html/search/all_64.js new file mode 100644 index 0000000000..163707c8d7 --- /dev/null +++ b/docs/html/search/all_64.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['debug',['Debug',['../group___host_util.html#ga0ac6d9c9d88ac0da0d644c88a3b36aa3',1,'cub::Debug(cudaError_t error, const char *message, const char *filename, int line)'],['../group___host_util.html#ga5a175d2a88f63f7f1ab30e8b4f2cfa95',1,'cub::Debug(cudaError_t error, const char *filename, int line)']]], + ['debug_2ecuh',['debug.cuh',['../debug_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/all_65.html b/docs/html/search/all_65.html new file mode 100644 index 0000000000..c2f4fcd94f --- /dev/null +++ b/docs/html/search/all_65.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_65.js b/docs/html/search/all_65.js new file mode 100644 index 0000000000..3edb0d15a3 --- /dev/null +++ b/docs/html/search/all_65.js @@ -0,0 +1,8 @@ +var searchData= +[ + ['enableif',['EnableIf',['../structcub_1_1_enable_if.html',1,'cub']]], + ['equality',['Equality',['../structcub_1_1_equality.html',1,'cub']]], + ['equals',['Equals',['../structcub_1_1_equals.html',1,'cub']]], + ['exclusivescan',['ExclusiveScan',['../classcub_1_1_warp_scan.html#ab034c0bd94f866b7044d085f0d354e2d',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op)'],['../classcub_1_1_warp_scan.html#a7ad7b67ebb45eae6d120e55206dace8e',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#af0e55650ffbbb6ad5245c11110fc9343',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_warp_scan.html#ae84a95431640ff2d450c4b0a98dd826e',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_warp_scan.html#acb0ad5c2aaa0866aa7bcc9a597098daa',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#a182cf61f1437c0ac0e3567a9737fcbfe',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#acc948eb8877a6d9956daebf258119b7a',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a64fbe22df260c4731536e1bbcec70cf6',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], const T &identity, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#aa858e1cc0cee3e54fc3fb00bc0ecb3ca',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, T identity, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a2a7bf6b9e06e0ed3a71931f1694359f5',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T identity, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a86857a9daede055f69299caff5b16259',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, T identity, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#a2cdb196b18b1d0eb3f7f85a57ed3ac7e',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], const T &identity, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#a1a0090740c3b47eb018831f36d4fe307',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a929f90d956502a7142fa780647241bf0',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#aaea795b16f8a66dbbef62952b5f73643',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#afc79e233524e1e357a4cb77c44a46957',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a05c65595bc59cf1bb0fd04965f3b0988',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#ac8d2690770ba251c6da988936f248da5',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op)']]], + ['exclusivesum',['ExclusiveSum',['../classcub_1_1_warp_scan.html#a2695420235a1ace8817a595a6f930d61',1,'cub::WarpScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_warp_scan.html#a2a7c0b9abd940adf1b76e1d5931fcfd7',1,'cub::WarpScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#a8b9720f46d2b9cb920c4eb8a6543fc2c',1,'cub::WarpScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#a01676b552903e7b5d240bbde7968d55e',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate)'],['../classcub_1_1_block_scan.html#aa4b6abbc17343b897a7b93d581620164',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate)'],['../classcub_1_1_block_scan.html#af0fb65e2f9663daaee32390dee4c786b',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a9adb14da21b88da067e0dae60c628183',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a8e661f683b84c496a0f1bcd96d5bb528',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_block_scan.html#a1414392abb5dc2f60386130ad8ad5130',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])']]] +]; diff --git a/docs/html/search/all_66.html b/docs/html/search/all_66.html new file mode 100644 index 0000000000..a9ac881c03 --- /dev/null +++ b/docs/html/search/all_66.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_66.js b/docs/html/search/all_66.js new file mode 100644 index 0000000000..a09c3361bc --- /dev/null +++ b/docs/html/search/all_66.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['flag',['Flag',['../classcub_1_1_block_discontinuity.html#ab6390151f109ac253810504ddc5a7c04',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)'],['../classcub_1_1_block_discontinuity.html#a3bdf3b7ad8ace5249f84e103f25ff3bb',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_discontinuity.html#a7fa4c2dc8bbe5db5da50fedca0613b46',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)'],['../classcub_1_1_block_discontinuity.html#a351ed32eaada93c944fbb29feda5a6cd',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])']]] +]; diff --git a/docs/html/search/all_68.html b/docs/html/search/all_68.html new file mode 100644 index 0000000000..dec41d62ef --- /dev/null +++ b/docs/html/search/all_68.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_68.js b/docs/html/search/all_68.js new file mode 100644 index 0000000000..8fcb135372 --- /dev/null +++ b/docs/html/search/all_68.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['host_20utilities',['Host Utilities',['../group___host_util.html',1,'']]] +]; diff --git a/docs/html/search/all_69.html b/docs/html/search/all_69.html new file mode 100644 index 0000000000..192e4bab2a --- /dev/null +++ b/docs/html/search/all_69.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_69.js b/docs/html/search/all_69.js new file mode 100644 index 0000000000..08639b4672 --- /dev/null +++ b/docs/html/search/all_69.js @@ -0,0 +1,7 @@ +var searchData= +[ + ['if',['If',['../structcub_1_1_if.html',1,'cub']]], + ['inclusivescan',['InclusiveScan',['../classcub_1_1_warp_scan.html#a9f0397ded5ce89a8750dc8fe10078f3e',1,'cub::WarpScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_warp_scan.html#a4df11b322777066e9237fc2ef3d257e5',1,'cub::WarpScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#ae5e4f735a2bda14ad6a94a68a0528bd1',1,'cub::WarpScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#a981f9bae42f6f9c5fe6950698b97d8d4',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a0b750ea27539a71e46657f3d63fdbce6',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a4b987cb649f4aced568b77bd9ac18db6',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#aae6c7a0cdb8ea21cd7eac0cecacd1ac1',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#ac5220b7189e39eb4ff67430f732b1f96',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#a046dfe9d6daa55a0d9c74d6ce2f7aa5b',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op)']]], + ['inclusivesum',['InclusiveSum',['../classcub_1_1_warp_scan.html#adec85c76d951c326e592e364aa63c728',1,'cub::WarpScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_warp_scan.html#a78bf6035a0bccc58913dc0ec570c487d',1,'cub::WarpScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#a032ce184b653241719effbd0b5b2dbcd',1,'cub::WarpScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#a738fa570c0a0e391397c342eaab388cb',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a88a16a5a98fadb09fa216b2d234f0b86',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate)'],['../classcub_1_1_block_scan.html#aed86bb94fe1908673dadbbaec0f95362',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a47978bae019da4e99c30519de96534a4',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#af4bfc827149cbcfd741e578cfaeee5c7',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_block_scan.html#ae9562dc6cb1e745c8714668dcef3e5b1',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])']]], + ['isvolatile',['IsVolatile',['../structcub_1_1_is_volatile.html',1,'cub']]] +]; diff --git a/docs/html/search/all_6c.html b/docs/html/search/all_6c.html new file mode 100644 index 0000000000..ae8bc48da3 --- /dev/null +++ b/docs/html/search/all_6c.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_6c.js b/docs/html/search/all_6c.js new file mode 100644 index 0000000000..cbd18f7293 --- /dev/null +++ b/docs/html/search/all_6c.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['load',['Load',['../classcub_1_1_block_load.html#ac671e9f033037fc01384a9296684200c',1,'cub::BlockLoad::Load(SmemStorage &smem_storage, InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_load.html#ae7025d183926de0430146d41b5771032',1,'cub::BlockLoad::Load(SmemStorage &smem_storage, InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])']]], + ['log2',['Log2',['../structcub_1_1_log2.html',1,'cub']]] +]; diff --git a/docs/html/search/all_6d.html b/docs/html/search/all_6d.html new file mode 100644 index 0000000000..ee90718ff2 --- /dev/null +++ b/docs/html/search/all_6d.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_6d.js b/docs/html/search/all_6d.js new file mode 100644 index 0000000000..3432b9e7a8 --- /dev/null +++ b/docs/html/search/all_6d.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['max',['Max',['../structcub_1_1_max.html',1,'cub']]] +]; diff --git a/docs/html/search/all_6e.html b/docs/html/search/all_6e.html new file mode 100644 index 0000000000..e0fd7653a0 --- /dev/null +++ b/docs/html/search/all_6e.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_6e.js b/docs/html/search/all_6e.js new file mode 100644 index 0000000000..8c05a0bd37 --- /dev/null +++ b/docs/html/search/all_6e.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['nulltype',['NullType',['../structcub_1_1_null_type.html',1,'cub']]], + ['numerictraits',['NumericTraits',['../structcub_1_1_numeric_traits.html',1,'cub']]], + ['numerictraits_3c_20removequalifiers_3c_20t_20_3e_3a_3atype_20_3e',['NumericTraits< RemoveQualifiers< T >::Type >',['../structcub_1_1_numeric_traits.html',1,'cub']]] +]; diff --git a/docs/html/search/all_6f.html b/docs/html/search/all_6f.html new file mode 100644 index 0000000000..5e86b030d2 --- /dev/null +++ b/docs/html/search/all_6f.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_6f.js b/docs/html/search/all_6f.js new file mode 100644 index 0000000000..5f733c3055 --- /dev/null +++ b/docs/html/search/all_6f.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['operator_28_29',['operator()',['../structcub_1_1_equality.html#a89f91d9fabb6b8237f97307ce04f1bab',1,'cub::Equality::operator()()'],['../structcub_1_1_sum.html#a05a1ac22d3e5c852dec8c39724297fe3',1,'cub::Sum::operator()()'],['../structcub_1_1_max.html#a880bd2cf50b320c1771eafe31ebf9ea1',1,'cub::Max::operator()()']]], + ['operators_2ecuh',['operators.cuh',['../operators_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/all_70.html b/docs/html/search/all_70.html new file mode 100644 index 0000000000..799c1a277d --- /dev/null +++ b/docs/html/search/all_70.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_70.js b/docs/html/search/all_70.js new file mode 100644 index 0000000000..dcfc9123ce --- /dev/null +++ b/docs/html/search/all_70.js @@ -0,0 +1,18 @@ +var searchData= +[ + ['ptx_5fload_5fca',['PTX_LOAD_CA',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55dad802bce71c7380a911ab0cee5b366fd3',1,'cub']]], + ['ptx_5fload_5fcg',['PTX_LOAD_CG',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da0e18a5a910be460d738772631eafadd0',1,'cub']]], + ['ptx_5fload_5fcs',['PTX_LOAD_CS',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da0b263e2237593103d5e9004e935c66af',1,'cub']]], + ['ptx_5fload_5fcv',['PTX_LOAD_CV',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da05ee1b160fa298ef4b2578a9df1c1350',1,'cub']]], + ['ptx_5fload_5fldg',['PTX_LOAD_LDG',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55dae8ca2d6545712389c0578224f214913d',1,'cub']]], + ['ptx_5fload_5fnone',['PTX_LOAD_NONE',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da017db24b99abd332be14151d35fa3cf5',1,'cub']]], + ['ptx_5fload_5fvs',['PTX_LOAD_VS',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55dae4cbe986a2413b418ec83e8bb153b990',1,'cub']]], + ['ptx_5fstore_5fcg',['PTX_STORE_CG',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a95a2bc222f2adce9dd2d0251f53e1d91',1,'cub']]], + ['ptx_5fstore_5fcs',['PTX_STORE_CS',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2ac08bd33e1c4694ccdb899dd9bdef9c96',1,'cub']]], + ['ptx_5fstore_5fnone',['PTX_STORE_NONE',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a5437dabe5d300b7188dbb42132363c05',1,'cub']]], + ['ptx_5fstore_5fvs',['PTX_STORE_VS',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2adee47f52a9358d88446393c5affd11aa',1,'cub']]], + ['ptx_5fstore_5fwb',['PTX_STORE_WB',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a2d57d44c3dbebbae63abcc3ccb80a412',1,'cub']]], + ['ptx_5fstore_5fwt',['PTX_STORE_WT',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a8d07fc5099d72afdc46b817d566d3df8',1,'cub']]], + ['ptxloadmodifier',['PtxLoadModifier',['../group___simt_utils.html#ga023420f30fec7d4b187fc98f4fd2a55d',1,'cub']]], + ['ptxstoremodifier',['PtxStoreModifier',['../group___simt_utils.html#gae9c7d6a6af7104f528509182ac9c9da2',1,'cub']]] +]; diff --git a/docs/html/search/all_72.html b/docs/html/search/all_72.html new file mode 100644 index 0000000000..347b9f6660 --- /dev/null +++ b/docs/html/search/all_72.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_72.js b/docs/html/search/all_72.js new file mode 100644 index 0000000000..e3c6279e2b --- /dev/null +++ b/docs/html/search/all_72.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['reduce',['Reduce',['../classcub_1_1_block_reduce.html#aee16d11eea520e487f387b7d9f2755d2',1,'cub::BlockReduce::Reduce(SmemStorage &smem_storage, T input, ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#acc9a5597731b4985fac1e8a90153d979',1,'cub::BlockReduce::Reduce(SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#aeddda91425d07c74b819d34ac5b7a0a6',1,'cub::BlockReduce::Reduce(SmemStorage &smem_storage, T input, ReductionOp reduction_op, const unsigned int &valid_threads)']]], + ['removequalifiers',['RemoveQualifiers',['../structcub_1_1_remove_qualifiers.html',1,'cub']]] +]; diff --git a/docs/html/search/all_73.html b/docs/html/search/all_73.html new file mode 100644 index 0000000000..9abac91a94 --- /dev/null +++ b/docs/html/search/all_73.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_73.js b/docs/html/search/all_73.js new file mode 100644 index 0000000000..a6f19fd8bf --- /dev/null +++ b/docs/html/search/all_73.js @@ -0,0 +1,15 @@ +var searchData= +[ + ['scattertoblocked',['ScatterToBlocked',['../classcub_1_1_block_exchange.html#a7a429434bbdc3e949a8291461a74bff9',1,'cub::BlockExchange']]], + ['scattertostriped',['ScatterToStriped',['../classcub_1_1_block_exchange.html#a8b8997367d0d42ee7eb600b981f7b72d',1,'cub::BlockExchange']]], + ['simt_20primitives',['SIMT Primitives',['../group___simt.html',1,'']]], + ['simt_20utilities',['SIMT Utilities',['../group___simt_utils.html',1,'']]], + ['smemstorage',['SmemStorage',['../classcub_1_1_warp_scan.html#a2bfa864e963cb4965139ac1b6c66d1b7',1,'cub::WarpScan::SmemStorage()'],['../classcub_1_1_block_reduce.html#aea61e2e067e0e2d3fba2b0c8e0f73d8d',1,'cub::BlockReduce::SmemStorage()'],['../classcub_1_1_block_scan.html#abda6008896e2e17b50c7deb0ab320e64',1,'cub::BlockScan::SmemStorage()'],['../classcub_1_1_block_radix_sort.html#a495e63ab526ce35e6dfce9fb5206746c',1,'cub::BlockRadixSort::SmemStorage()'],['../classcub_1_1_block_load.html#a09296fd690f1452df9cae24a037e906a',1,'cub::BlockLoad::SmemStorage()'],['../classcub_1_1_block_store.html#aa80c1691bc7aa80bc38c2797b3a99c24',1,'cub::BlockStore::SmemStorage()'],['../classcub_1_1_block_exchange.html#ad91573946e4abe5ae5e34277ded1c215',1,'cub::BlockExchange::SmemStorage()'],['../classcub_1_1_block_discontinuity.html#a855c92f9c3869909913860fa11e755a4',1,'cub::BlockDiscontinuity::SmemStorage()']]], + ['sortblocked',['SortBlocked',['../classcub_1_1_block_radix_sort.html#abdbfda59c129946222ab10d2e2e6f6f5',1,'cub::BlockRadixSort::SortBlocked(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)'],['../classcub_1_1_block_radix_sort.html#a7e304558942536fc1636849f8d93d896',1,'cub::BlockRadixSort::SortBlocked(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)']]], + ['sortblockedtostriped',['SortBlockedToStriped',['../classcub_1_1_block_radix_sort.html#ac7fe497d674f5da3062a3d34f010f438',1,'cub::BlockRadixSort::SortBlockedToStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)'],['../classcub_1_1_block_radix_sort.html#ab1a7c9c9e536f13741b4c4f6c369ce80',1,'cub::BlockRadixSort::SortBlockedToStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)']]], + ['sortstriped',['SortStriped',['../classcub_1_1_block_radix_sort.html#a81e081320239182670da329c2b036166',1,'cub::BlockRadixSort::SortStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)'],['../classcub_1_1_block_radix_sort.html#a0c730daecd6cc6f69135ae36d11d5c53',1,'cub::BlockRadixSort::SortStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)']]], + ['store',['Store',['../classcub_1_1_block_store.html#a1fc63a32e80b1275145a469e719f8530',1,'cub::BlockStore::Store(SmemStorage &smem_storage, OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_store.html#a3c1f7f37767338869f44e410a90f2255',1,'cub::BlockStore::Store(SmemStorage &smem_storage, OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])']]], + ['stripedtoblocked',['StripedToBlocked',['../classcub_1_1_block_exchange.html#ad8000bf73c3ce935018f32451985ae37',1,'cub::BlockExchange']]], + ['sum',['Sum',['../classcub_1_1_block_reduce.html#a136060887c434257984b8bf3f5c62323',1,'cub::BlockReduce::Sum(SmemStorage &smem_storage, T input)'],['../classcub_1_1_block_reduce.html#abe33a10ae2e316943e95d23f1d4d702a',1,'cub::BlockReduce::Sum(SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_reduce.html#aad43736f8ea38a2c9052059b1d80c7fc',1,'cub::BlockReduce::Sum(SmemStorage &smem_storage, T input, const unsigned int &valid_threads)']]], + ['sum',['Sum',['../structcub_1_1_sum.html',1,'cub']]] +]; diff --git a/docs/html/search/all_74.html b/docs/html/search/all_74.html new file mode 100644 index 0000000000..c646aeffcd --- /dev/null +++ b/docs/html/search/all_74.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_74.js b/docs/html/search/all_74.js new file mode 100644 index 0000000000..df33641f84 --- /dev/null +++ b/docs/html/search/all_74.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['thread_5fload_2ecuh',['thread_load.cuh',['../thread__load_8cuh.html',1,'']]], + ['thread_5fstore_2ecuh',['thread_store.cuh',['../thread__store_8cuh.html',1,'']]], + ['threadload',['ThreadLoad',['../group___simt_utils.html#ga1e390b9fee4c8012a021d49d9b76b1e8',1,'cub']]], + ['threadstore',['ThreadStore',['../group___simt_utils.html#gad117ecb99b9230a032971b0ac08ca6dc',1,'cub']]], + ['traits',['Traits',['../structcub_1_1_traits.html',1,'cub']]], + ['type',['Type',['../structcub_1_1_if.html#af689e9527f56372e66413b65581ded8e',1,'cub::If::Type()'],['../structcub_1_1_remove_qualifiers.html#a9143e196ef5e6a0176b953f677e94671',1,'cub::RemoveQualifiers::Type()'],['../structcub_1_1_enable_if.html#aafd9405b5887d2a6d3553eee0202798a',1,'cub::EnableIf::Type()']]], + ['type_5futils_2ecuh',['type_utils.cuh',['../type__utils_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/all_76.html b/docs/html/search/all_76.html new file mode 100644 index 0000000000..50b86daa03 --- /dev/null +++ b/docs/html/search/all_76.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_76.js b/docs/html/search/all_76.js new file mode 100644 index 0000000000..58c39304d9 --- /dev/null +++ b/docs/html/search/all_76.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['value',['VALUE',['../structcub_1_1_log2.html#ad1923657cb57427a8621f53022590cd2',1,'cub::Log2']]] +]; diff --git a/docs/html/search/all_77.html b/docs/html/search/all_77.html new file mode 100644 index 0000000000..55d7142924 --- /dev/null +++ b/docs/html/search/all_77.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/all_77.js b/docs/html/search/all_77.js new file mode 100644 index 0000000000..d61ee34071 --- /dev/null +++ b/docs/html/search/all_77.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['warp_5fscan_2ecuh',['warp_scan.cuh',['../warp__scan_8cuh.html',1,'']]], + ['warpscan',['WarpScan',['../classcub_1_1_warp_scan.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_61.html b/docs/html/search/classes_61.html new file mode 100644 index 0000000000..a4c07d590c --- /dev/null +++ b/docs/html/search/classes_61.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_61.js b/docs/html/search/classes_61.js new file mode 100644 index 0000000000..bb41a712e7 --- /dev/null +++ b/docs/html/search/classes_61.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['arraytraits',['ArrayTraits',['../structcub_1_1_array_traits.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_62.html b/docs/html/search/classes_62.html new file mode 100644 index 0000000000..04a59d2efe --- /dev/null +++ b/docs/html/search/classes_62.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_62.js b/docs/html/search/classes_62.js new file mode 100644 index 0000000000..a70ac445b6 --- /dev/null +++ b/docs/html/search/classes_62.js @@ -0,0 +1,13 @@ +var searchData= +[ + ['basetraits',['BaseTraits',['../structcub_1_1_base_traits.html',1,'cub']]], + ['basetraits_3c_20not_5fa_5fnumber_2c_20false_2c_20false_2c_20removequalifiers_3c_20t_20_3e_3a_3atype_20_3e',['BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type >',['../structcub_1_1_base_traits.html',1,'cub']]], + ['basetraits_3c_20not_5fa_5fnumber_2c_20false_2c_20false_2c_20t_20_3e',['BaseTraits< NOT_A_NUMBER, false, false, T >',['../structcub_1_1_base_traits.html',1,'cub']]], + ['blockdiscontinuity',['BlockDiscontinuity',['../classcub_1_1_block_discontinuity.html',1,'cub']]], + ['blockexchange',['BlockExchange',['../classcub_1_1_block_exchange.html',1,'cub']]], + ['blockload',['BlockLoad',['../classcub_1_1_block_load.html',1,'cub']]], + ['blockradixsort',['BlockRadixSort',['../classcub_1_1_block_radix_sort.html',1,'cub']]], + ['blockreduce',['BlockReduce',['../classcub_1_1_block_reduce.html',1,'cub']]], + ['blockscan',['BlockScan',['../classcub_1_1_block_scan.html',1,'cub']]], + ['blockstore',['BlockStore',['../classcub_1_1_block_store.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_65.html b/docs/html/search/classes_65.html new file mode 100644 index 0000000000..4f441f9def --- /dev/null +++ b/docs/html/search/classes_65.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_65.js b/docs/html/search/classes_65.js new file mode 100644 index 0000000000..4a2fca4174 --- /dev/null +++ b/docs/html/search/classes_65.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['enableif',['EnableIf',['../structcub_1_1_enable_if.html',1,'cub']]], + ['equality',['Equality',['../structcub_1_1_equality.html',1,'cub']]], + ['equals',['Equals',['../structcub_1_1_equals.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_69.html b/docs/html/search/classes_69.html new file mode 100644 index 0000000000..7a0d013683 --- /dev/null +++ b/docs/html/search/classes_69.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_69.js b/docs/html/search/classes_69.js new file mode 100644 index 0000000000..8b9fcc1a49 --- /dev/null +++ b/docs/html/search/classes_69.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['if',['If',['../structcub_1_1_if.html',1,'cub']]], + ['isvolatile',['IsVolatile',['../structcub_1_1_is_volatile.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_6c.html b/docs/html/search/classes_6c.html new file mode 100644 index 0000000000..a16bb58f62 --- /dev/null +++ b/docs/html/search/classes_6c.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_6c.js b/docs/html/search/classes_6c.js new file mode 100644 index 0000000000..b55816e39b --- /dev/null +++ b/docs/html/search/classes_6c.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['log2',['Log2',['../structcub_1_1_log2.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_6d.html b/docs/html/search/classes_6d.html new file mode 100644 index 0000000000..12b1c839ae --- /dev/null +++ b/docs/html/search/classes_6d.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_6d.js b/docs/html/search/classes_6d.js new file mode 100644 index 0000000000..3432b9e7a8 --- /dev/null +++ b/docs/html/search/classes_6d.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['max',['Max',['../structcub_1_1_max.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_6e.html b/docs/html/search/classes_6e.html new file mode 100644 index 0000000000..a183c15e22 --- /dev/null +++ b/docs/html/search/classes_6e.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_6e.js b/docs/html/search/classes_6e.js new file mode 100644 index 0000000000..8c05a0bd37 --- /dev/null +++ b/docs/html/search/classes_6e.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['nulltype',['NullType',['../structcub_1_1_null_type.html',1,'cub']]], + ['numerictraits',['NumericTraits',['../structcub_1_1_numeric_traits.html',1,'cub']]], + ['numerictraits_3c_20removequalifiers_3c_20t_20_3e_3a_3atype_20_3e',['NumericTraits< RemoveQualifiers< T >::Type >',['../structcub_1_1_numeric_traits.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_72.html b/docs/html/search/classes_72.html new file mode 100644 index 0000000000..03a77208fb --- /dev/null +++ b/docs/html/search/classes_72.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_72.js b/docs/html/search/classes_72.js new file mode 100644 index 0000000000..190feb2666 --- /dev/null +++ b/docs/html/search/classes_72.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['removequalifiers',['RemoveQualifiers',['../structcub_1_1_remove_qualifiers.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_73.html b/docs/html/search/classes_73.html new file mode 100644 index 0000000000..f447c456fe --- /dev/null +++ b/docs/html/search/classes_73.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_73.js b/docs/html/search/classes_73.js new file mode 100644 index 0000000000..47174d1acd --- /dev/null +++ b/docs/html/search/classes_73.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['sum',['Sum',['../structcub_1_1_sum.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_74.html b/docs/html/search/classes_74.html new file mode 100644 index 0000000000..4b0fdaa160 --- /dev/null +++ b/docs/html/search/classes_74.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_74.js b/docs/html/search/classes_74.js new file mode 100644 index 0000000000..cb6fc21918 --- /dev/null +++ b/docs/html/search/classes_74.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['traits',['Traits',['../structcub_1_1_traits.html',1,'cub']]] +]; diff --git a/docs/html/search/classes_77.html b/docs/html/search/classes_77.html new file mode 100644 index 0000000000..dd06de9930 --- /dev/null +++ b/docs/html/search/classes_77.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/classes_77.js b/docs/html/search/classes_77.js new file mode 100644 index 0000000000..3e7deeeae5 --- /dev/null +++ b/docs/html/search/classes_77.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['warpscan',['WarpScan',['../classcub_1_1_warp_scan.html',1,'cub']]] +]; diff --git a/docs/html/search/close.png b/docs/html/search/close.png new file mode 100644 index 0000000000..9342d3dfee Binary files /dev/null and b/docs/html/search/close.png differ diff --git a/docs/html/search/defines_63.html b/docs/html/search/defines_63.html new file mode 100644 index 0000000000..67967fbce3 --- /dev/null +++ b/docs/html/search/defines_63.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/defines_63.js b/docs/html/search/defines_63.js new file mode 100644 index 0000000000..b9a79c5ce3 --- /dev/null +++ b/docs/html/search/defines_63.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['cub_5fhas_5fnested_5ftype',['CUB_HAS_NESTED_TYPE',['../type__utils_8cuh.html#ad785ef798316018015561fda4feec8af',1,'type_utils.cuh']]], + ['cubdebug',['CubDebug',['../debug_8cuh.html#a04236bb0db0efe7a19c9ecba0aedc1e5',1,'debug.cuh']]], + ['cubdebugexit',['CubDebugExit',['../debug_8cuh.html#a2e5de1db78fd84552bda8254efa409a3',1,'debug.cuh']]] +]; diff --git a/docs/html/search/enums_62.html b/docs/html/search/enums_62.html new file mode 100644 index 0000000000..e8fef345b1 --- /dev/null +++ b/docs/html/search/enums_62.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/enums_62.js b/docs/html/search/enums_62.js new file mode 100644 index 0000000000..5025d0a762 --- /dev/null +++ b/docs/html/search/enums_62.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['blockloadpolicy',['BlockLoadPolicy',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290',1,'cub']]], + ['blockscanpolicy',['BlockScanPolicy',['../namespacecub.html#aa7484021273cbfd89229a6b5c205b9f1',1,'cub']]], + ['blockstorepolicy',['BlockStorePolicy',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189e',1,'cub']]] +]; diff --git a/docs/html/search/enums_63.html b/docs/html/search/enums_63.html new file mode 100644 index 0000000000..e8a1e6c81b --- /dev/null +++ b/docs/html/search/enums_63.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/enums_63.js b/docs/html/search/enums_63.js new file mode 100644 index 0000000000..94d5e5b1d7 --- /dev/null +++ b/docs/html/search/enums_63.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['category',['Category',['../namespacecub.html#a4733b6d40e923244502e6f5b200766ef',1,'cub']]] +]; diff --git a/docs/html/search/enums_70.html b/docs/html/search/enums_70.html new file mode 100644 index 0000000000..d737ab2180 --- /dev/null +++ b/docs/html/search/enums_70.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/enums_70.js b/docs/html/search/enums_70.js new file mode 100644 index 0000000000..186c349234 --- /dev/null +++ b/docs/html/search/enums_70.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['ptxloadmodifier',['PtxLoadModifier',['../group___simt_utils.html#ga023420f30fec7d4b187fc98f4fd2a55d',1,'cub']]], + ['ptxstoremodifier',['PtxStoreModifier',['../group___simt_utils.html#gae9c7d6a6af7104f528509182ac9c9da2',1,'cub']]] +]; diff --git a/docs/html/search/enumvalues_62.html b/docs/html/search/enumvalues_62.html new file mode 100644 index 0000000000..ecbeb60860 --- /dev/null +++ b/docs/html/search/enumvalues_62.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/enumvalues_62.js b/docs/html/search/enumvalues_62.js new file mode 100644 index 0000000000..ec2d47019f --- /dev/null +++ b/docs/html/search/enumvalues_62.js @@ -0,0 +1,11 @@ +var searchData= +[ + ['block_5fload_5fdirect',['BLOCK_LOAD_DIRECT',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290a2d4d8900d7e697e9dac4062e97d3d835',1,'cub']]], + ['block_5fload_5ftranspose',['BLOCK_LOAD_TRANSPOSE',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290acd94f285472e8f7c883a7407f6f4efc4',1,'cub']]], + ['block_5fload_5fvectorize',['BLOCK_LOAD_VECTORIZE',['../namespacecub.html#a70f1d3c7536d858d49b896e937d25290a826be9d4df1c44c0e5c00a9c9c136965',1,'cub']]], + ['block_5fscan_5fraking',['BLOCK_SCAN_RAKING',['../namespacecub.html#aa7484021273cbfd89229a6b5c205b9f1a0fa6cac57b7df2f475a67af053b9371c',1,'cub']]], + ['block_5fscan_5fwarpscans',['BLOCK_SCAN_WARPSCANS',['../namespacecub.html#aa7484021273cbfd89229a6b5c205b9f1a08bbb9b8f17a4b9e568c1333aeda6324',1,'cub']]], + ['block_5fstore_5fdirect',['BLOCK_STORE_DIRECT',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189ea9b8dcc7b6b06bcfc24af4f499523b880',1,'cub']]], + ['block_5fstore_5ftranspose',['BLOCK_STORE_TRANSPOSE',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189eab0bbe20613466c3cedfcfea33a97d69c',1,'cub']]], + ['block_5fstore_5fvectorize',['BLOCK_STORE_VECTORIZE',['../namespacecub.html#aaaa9ee8c8a57c6607909c110affd189ea0ccd625a7f2f3649155cbd5a27adfb41',1,'cub']]] +]; diff --git a/docs/html/search/enumvalues_70.html b/docs/html/search/enumvalues_70.html new file mode 100644 index 0000000000..0b609cb11b --- /dev/null +++ b/docs/html/search/enumvalues_70.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/enumvalues_70.js b/docs/html/search/enumvalues_70.js new file mode 100644 index 0000000000..118397b377 --- /dev/null +++ b/docs/html/search/enumvalues_70.js @@ -0,0 +1,16 @@ +var searchData= +[ + ['ptx_5fload_5fca',['PTX_LOAD_CA',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55dad802bce71c7380a911ab0cee5b366fd3',1,'cub']]], + ['ptx_5fload_5fcg',['PTX_LOAD_CG',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da0e18a5a910be460d738772631eafadd0',1,'cub']]], + ['ptx_5fload_5fcs',['PTX_LOAD_CS',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da0b263e2237593103d5e9004e935c66af',1,'cub']]], + ['ptx_5fload_5fcv',['PTX_LOAD_CV',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da05ee1b160fa298ef4b2578a9df1c1350',1,'cub']]], + ['ptx_5fload_5fldg',['PTX_LOAD_LDG',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55dae8ca2d6545712389c0578224f214913d',1,'cub']]], + ['ptx_5fload_5fnone',['PTX_LOAD_NONE',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55da017db24b99abd332be14151d35fa3cf5',1,'cub']]], + ['ptx_5fload_5fvs',['PTX_LOAD_VS',['../group___simt_utils.html#gga023420f30fec7d4b187fc98f4fd2a55dae4cbe986a2413b418ec83e8bb153b990',1,'cub']]], + ['ptx_5fstore_5fcg',['PTX_STORE_CG',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a95a2bc222f2adce9dd2d0251f53e1d91',1,'cub']]], + ['ptx_5fstore_5fcs',['PTX_STORE_CS',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2ac08bd33e1c4694ccdb899dd9bdef9c96',1,'cub']]], + ['ptx_5fstore_5fnone',['PTX_STORE_NONE',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a5437dabe5d300b7188dbb42132363c05',1,'cub']]], + ['ptx_5fstore_5fvs',['PTX_STORE_VS',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2adee47f52a9358d88446393c5affd11aa',1,'cub']]], + ['ptx_5fstore_5fwb',['PTX_STORE_WB',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a2d57d44c3dbebbae63abcc3ccb80a412',1,'cub']]], + ['ptx_5fstore_5fwt',['PTX_STORE_WT',['../group___simt_utils.html#ggae9c7d6a6af7104f528509182ac9c9da2a8d07fc5099d72afdc46b817d566d3df8',1,'cub']]] +]; diff --git a/docs/html/search/files_62.html b/docs/html/search/files_62.html new file mode 100644 index 0000000000..86dfe39e68 --- /dev/null +++ b/docs/html/search/files_62.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/files_62.js b/docs/html/search/files_62.js new file mode 100644 index 0000000000..d96228005f --- /dev/null +++ b/docs/html/search/files_62.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['block_5fdiscontinuity_2ecuh',['block_discontinuity.cuh',['../block__discontinuity_8cuh.html',1,'']]], + ['block_5fexchange_2ecuh',['block_exchange.cuh',['../block__exchange_8cuh.html',1,'']]], + ['block_5fload_2ecuh',['block_load.cuh',['../block__load_8cuh.html',1,'']]], + ['block_5fradix_5fsort_2ecuh',['block_radix_sort.cuh',['../block__radix__sort_8cuh.html',1,'']]], + ['block_5freduce_2ecuh',['block_reduce.cuh',['../block__reduce_8cuh.html',1,'']]], + ['block_5fscan_2ecuh',['block_scan.cuh',['../block__scan_8cuh.html',1,'']]], + ['block_5fstore_2ecuh',['block_store.cuh',['../block__store_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/files_64.html b/docs/html/search/files_64.html new file mode 100644 index 0000000000..175a900cc5 --- /dev/null +++ b/docs/html/search/files_64.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/files_64.js b/docs/html/search/files_64.js new file mode 100644 index 0000000000..0a441a33f0 --- /dev/null +++ b/docs/html/search/files_64.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['debug_2ecuh',['debug.cuh',['../debug_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/files_6f.html b/docs/html/search/files_6f.html new file mode 100644 index 0000000000..4f9b7bbdba --- /dev/null +++ b/docs/html/search/files_6f.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/files_6f.js b/docs/html/search/files_6f.js new file mode 100644 index 0000000000..5192f4553e --- /dev/null +++ b/docs/html/search/files_6f.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['operators_2ecuh',['operators.cuh',['../operators_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/files_74.html b/docs/html/search/files_74.html new file mode 100644 index 0000000000..985db86908 --- /dev/null +++ b/docs/html/search/files_74.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/files_74.js b/docs/html/search/files_74.js new file mode 100644 index 0000000000..802bcd3b36 --- /dev/null +++ b/docs/html/search/files_74.js @@ -0,0 +1,6 @@ +var searchData= +[ + ['thread_5fload_2ecuh',['thread_load.cuh',['../thread__load_8cuh.html',1,'']]], + ['thread_5fstore_2ecuh',['thread_store.cuh',['../thread__store_8cuh.html',1,'']]], + ['type_5futils_2ecuh',['type_utils.cuh',['../type__utils_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/files_77.html b/docs/html/search/files_77.html new file mode 100644 index 0000000000..63bf92c096 --- /dev/null +++ b/docs/html/search/files_77.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/files_77.js b/docs/html/search/files_77.js new file mode 100644 index 0000000000..8be4ab6ca7 --- /dev/null +++ b/docs/html/search/files_77.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['warp_5fscan_2ecuh',['warp_scan.cuh',['../warp__scan_8cuh.html',1,'']]] +]; diff --git a/docs/html/search/functions_62.html b/docs/html/search/functions_62.html new file mode 100644 index 0000000000..5134d2d296 --- /dev/null +++ b/docs/html/search/functions_62.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_62.js b/docs/html/search/functions_62.js new file mode 100644 index 0000000000..3336b785a3 --- /dev/null +++ b/docs/html/search/functions_62.js @@ -0,0 +1,10 @@ +var searchData= +[ + ['blockedtostriped',['BlockedToStriped',['../classcub_1_1_block_exchange.html#a068f68d3f9d5c53920eeae82594d6935',1,'cub::BlockExchange']]], + ['blockloaddirect',['BlockLoadDirect',['../group___simt_utils.html#ga2ece00cc00c1d3269ee79ddf60d15457',1,'cub::BlockLoadDirect(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga51495fa39938ecf57056d4ca6f0260de',1,'cub::BlockLoadDirect(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga01e0a2d42d5b20aab660815c5cf258a0',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gaac537b6a8c9caaae1e6e77e9717e9541',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gae910789e82acd344d6f5a4cc50beef03',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gac20fbd7aaa120e661575fe6e8028a015',1,'cub::BlockLoadDirect(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD])']]], + ['blockloaddirectstriped',['BlockLoadDirectStriped',['../group___simt_utils.html#ga10442f4a83e49fb4a414ce6ce9234b79',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga74f3768367f80c79037b3e77c13bf4bc',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga405e4ed36717a6d2c0584578ab94923a',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga7ba15be704f5aa7c7db809a66af43160',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gabf20f04ee43adc4661429a7902f71911',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gaf826ded39a7e107a5f15416d4b147be0',1,'cub::BlockLoadDirectStriped(InputIterator block_itr, const SizeT &guarded_items, T oob_default, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)']]], + ['blockloadvectorized',['BlockLoadVectorized',['../group___simt_utils.html#gaea8200ef976bb588c569e039ea79005c',1,'cub::BlockLoadVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#gab1a8ffc7fe70a636a3d09403344cfced',1,'cub::BlockLoadVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])']]], + ['blockstoredirect',['BlockStoreDirect',['../group___simt_utils.html#gaa8f12f02c082f8d689100b8ac88f8f61',1,'cub::BlockStoreDirect(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga2d52e8ce92c8bc044898cc289a7e96b4',1,'cub::BlockStoreDirect(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga8b5f82ad8487072b6cc80b312db1962d',1,'cub::BlockStoreDirect(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga34a623c83894408f4f05ceb788d5ac92',1,'cub::BlockStoreDirect(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])']]], + ['blockstoredirectstriped',['BlockStoreDirectStriped',['../group___simt_utils.html#gaa18341f23a5d00c1b148e0013a9cc637',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gaed26402e843c84978ce85da24819ebeb',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#gadcef89bcc6b3c66e1fa1267c15b08a78',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)'],['../group___simt_utils.html#ga75150e5519f86c1054d7a7584e1a4f23',1,'cub::BlockStoreDirectStriped(OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD], int stride=blockDim.x)']]], + ['blockstorevectorized',['BlockStoreVectorized',['../group___simt_utils.html#ga013c3ab8214854f45e8d678958e7dde9',1,'cub::BlockStoreVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])'],['../group___simt_utils.html#ga5db0cef20c11ea62aef484c587c4e064',1,'cub::BlockStoreVectorized(T *block_ptr, T(&items)[ITEMS_PER_THREAD])']]] +]; diff --git a/docs/html/search/functions_64.html b/docs/html/search/functions_64.html new file mode 100644 index 0000000000..17149308ab --- /dev/null +++ b/docs/html/search/functions_64.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_64.js b/docs/html/search/functions_64.js new file mode 100644 index 0000000000..a8f34c329c --- /dev/null +++ b/docs/html/search/functions_64.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['debug',['Debug',['../group___host_util.html#ga0ac6d9c9d88ac0da0d644c88a3b36aa3',1,'cub::Debug(cudaError_t error, const char *message, const char *filename, int line)'],['../group___host_util.html#ga5a175d2a88f63f7f1ab30e8b4f2cfa95',1,'cub::Debug(cudaError_t error, const char *filename, int line)']]] +]; diff --git a/docs/html/search/functions_65.html b/docs/html/search/functions_65.html new file mode 100644 index 0000000000..13260cf253 --- /dev/null +++ b/docs/html/search/functions_65.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_65.js b/docs/html/search/functions_65.js new file mode 100644 index 0000000000..c84bbb320c --- /dev/null +++ b/docs/html/search/functions_65.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['exclusivescan',['ExclusiveScan',['../classcub_1_1_warp_scan.html#ab034c0bd94f866b7044d085f0d354e2d',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op)'],['../classcub_1_1_warp_scan.html#a7ad7b67ebb45eae6d120e55206dace8e',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#af0e55650ffbbb6ad5245c11110fc9343',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_warp_scan.html#ae84a95431640ff2d450c4b0a98dd826e',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_warp_scan.html#acb0ad5c2aaa0866aa7bcc9a597098daa',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#a182cf61f1437c0ac0e3567a9737fcbfe',1,'cub::WarpScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#acc948eb8877a6d9956daebf258119b7a',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, const T &identity, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a64fbe22df260c4731536e1bbcec70cf6',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], const T &identity, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#aa858e1cc0cee3e54fc3fb00bc0ecb3ca',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, T identity, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a2a7bf6b9e06e0ed3a71931f1694359f5',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T identity, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a86857a9daede055f69299caff5b16259',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, T identity, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#a2cdb196b18b1d0eb3f7f85a57ed3ac7e',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], const T &identity, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#a1a0090740c3b47eb018831f36d4fe307',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a929f90d956502a7142fa780647241bf0',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#aaea795b16f8a66dbbef62952b5f73643',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#afc79e233524e1e357a4cb77c44a46957',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a05c65595bc59cf1bb0fd04965f3b0988',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#ac8d2690770ba251c6da988936f248da5',1,'cub::BlockScan::ExclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op)']]], + ['exclusivesum',['ExclusiveSum',['../classcub_1_1_warp_scan.html#a2695420235a1ace8817a595a6f930d61',1,'cub::WarpScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_warp_scan.html#a2a7c0b9abd940adf1b76e1d5931fcfd7',1,'cub::WarpScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#a8b9720f46d2b9cb920c4eb8a6543fc2c',1,'cub::WarpScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#a01676b552903e7b5d240bbde7968d55e',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate)'],['../classcub_1_1_block_scan.html#aa4b6abbc17343b897a7b93d581620164',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate)'],['../classcub_1_1_block_scan.html#af0fb65e2f9663daaee32390dee4c786b',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a9adb14da21b88da067e0dae60c628183',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a8e661f683b84c496a0f1bcd96d5bb528',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_block_scan.html#a1414392abb5dc2f60386130ad8ad5130',1,'cub::BlockScan::ExclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])']]] +]; diff --git a/docs/html/search/functions_66.html b/docs/html/search/functions_66.html new file mode 100644 index 0000000000..12565e3b2f --- /dev/null +++ b/docs/html/search/functions_66.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_66.js b/docs/html/search/functions_66.js new file mode 100644 index 0000000000..a09c3361bc --- /dev/null +++ b/docs/html/search/functions_66.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['flag',['Flag',['../classcub_1_1_block_discontinuity.html#ab6390151f109ac253810504ddc5a7c04',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)'],['../classcub_1_1_block_discontinuity.html#a3bdf3b7ad8ace5249f84e103f25ff3bb',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_discontinuity.html#a7fa4c2dc8bbe5db5da50fedca0613b46',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD], T &last_tile_item)'],['../classcub_1_1_block_discontinuity.html#a351ed32eaada93c944fbb29feda5a6cd',1,'cub::BlockDiscontinuity::Flag(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T tile_predecessor, FlagOp flag_op, FlagT(&flags)[ITEMS_PER_THREAD])']]] +]; diff --git a/docs/html/search/functions_69.html b/docs/html/search/functions_69.html new file mode 100644 index 0000000000..9edd1a1c1b --- /dev/null +++ b/docs/html/search/functions_69.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_69.js b/docs/html/search/functions_69.js new file mode 100644 index 0000000000..27182ee283 --- /dev/null +++ b/docs/html/search/functions_69.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['inclusivescan',['InclusiveScan',['../classcub_1_1_warp_scan.html#a9f0397ded5ce89a8750dc8fe10078f3e',1,'cub::WarpScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_warp_scan.html#a4df11b322777066e9237fc2ef3d257e5',1,'cub::WarpScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#ae5e4f735a2bda14ad6a94a68a0528bd1',1,'cub::WarpScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#a981f9bae42f6f9c5fe6950698b97d8d4',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a0b750ea27539a71e46657f3d63fdbce6',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a4b987cb649f4aced568b77bd9ac18db6',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#aae6c7a0cdb8ea21cd7eac0cecacd1ac1',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#ac5220b7189e39eb4ff67430f732b1f96',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T input, T &output, ScanOp scan_op)'],['../classcub_1_1_block_scan.html#a046dfe9d6daa55a0d9c74d6ce2f7aa5b',1,'cub::BlockScan::InclusiveScan(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], ScanOp scan_op)']]], + ['inclusivesum',['InclusiveSum',['../classcub_1_1_warp_scan.html#adec85c76d951c326e592e364aa63c728',1,'cub::WarpScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_warp_scan.html#a78bf6035a0bccc58913dc0ec570c487d',1,'cub::WarpScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate)'],['../classcub_1_1_warp_scan.html#a032ce184b653241719effbd0b5b2dbcd',1,'cub::WarpScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &warp_aggregate, WarpPrefixOp &warp_prefix_op)'],['../classcub_1_1_block_scan.html#a738fa570c0a0e391397c342eaab388cb',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate)'],['../classcub_1_1_block_scan.html#a88a16a5a98fadb09fa216b2d234f0b86',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate)'],['../classcub_1_1_block_scan.html#aed86bb94fe1908673dadbbaec0f95362',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output, T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#a47978bae019da4e99c30519de96534a4',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD], T &block_aggregate, BlockPrefixOp &block_prefix_op)'],['../classcub_1_1_block_scan.html#af4bfc827149cbcfd741e578cfaeee5c7',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T input, T &output)'],['../classcub_1_1_block_scan.html#ae9562dc6cb1e745c8714668dcef3e5b1',1,'cub::BlockScan::InclusiveSum(SmemStorage &smem_storage, T(&input)[ITEMS_PER_THREAD], T(&output)[ITEMS_PER_THREAD])']]] +]; diff --git a/docs/html/search/functions_6c.html b/docs/html/search/functions_6c.html new file mode 100644 index 0000000000..33c0d6dd14 --- /dev/null +++ b/docs/html/search/functions_6c.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_6c.js b/docs/html/search/functions_6c.js new file mode 100644 index 0000000000..9155e84810 --- /dev/null +++ b/docs/html/search/functions_6c.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['load',['Load',['../classcub_1_1_block_load.html#ac671e9f033037fc01384a9296684200c',1,'cub::BlockLoad::Load(SmemStorage &smem_storage, InputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_load.html#ae7025d183926de0430146d41b5771032',1,'cub::BlockLoad::Load(SmemStorage &smem_storage, InputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])']]] +]; diff --git a/docs/html/search/functions_6f.html b/docs/html/search/functions_6f.html new file mode 100644 index 0000000000..9d6926417d --- /dev/null +++ b/docs/html/search/functions_6f.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_6f.js b/docs/html/search/functions_6f.js new file mode 100644 index 0000000000..342970781d --- /dev/null +++ b/docs/html/search/functions_6f.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['operator_28_29',['operator()',['../structcub_1_1_equality.html#a89f91d9fabb6b8237f97307ce04f1bab',1,'cub::Equality::operator()()'],['../structcub_1_1_sum.html#a05a1ac22d3e5c852dec8c39724297fe3',1,'cub::Sum::operator()()'],['../structcub_1_1_max.html#a880bd2cf50b320c1771eafe31ebf9ea1',1,'cub::Max::operator()()']]] +]; diff --git a/docs/html/search/functions_72.html b/docs/html/search/functions_72.html new file mode 100644 index 0000000000..71f58bbd14 --- /dev/null +++ b/docs/html/search/functions_72.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_72.js b/docs/html/search/functions_72.js new file mode 100644 index 0000000000..54fa60508c --- /dev/null +++ b/docs/html/search/functions_72.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['reduce',['Reduce',['../classcub_1_1_block_reduce.html#aee16d11eea520e487f387b7d9f2755d2',1,'cub::BlockReduce::Reduce(SmemStorage &smem_storage, T input, ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#acc9a5597731b4985fac1e8a90153d979',1,'cub::BlockReduce::Reduce(SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD], ReductionOp reduction_op)'],['../classcub_1_1_block_reduce.html#aeddda91425d07c74b819d34ac5b7a0a6',1,'cub::BlockReduce::Reduce(SmemStorage &smem_storage, T input, ReductionOp reduction_op, const unsigned int &valid_threads)']]] +]; diff --git a/docs/html/search/functions_73.html b/docs/html/search/functions_73.html new file mode 100644 index 0000000000..c80660e8c3 --- /dev/null +++ b/docs/html/search/functions_73.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_73.js b/docs/html/search/functions_73.js new file mode 100644 index 0000000000..cbbbbf39a0 --- /dev/null +++ b/docs/html/search/functions_73.js @@ -0,0 +1,11 @@ +var searchData= +[ + ['scattertoblocked',['ScatterToBlocked',['../classcub_1_1_block_exchange.html#a7a429434bbdc3e949a8291461a74bff9',1,'cub::BlockExchange']]], + ['scattertostriped',['ScatterToStriped',['../classcub_1_1_block_exchange.html#a8b8997367d0d42ee7eb600b981f7b72d',1,'cub::BlockExchange']]], + ['sortblocked',['SortBlocked',['../classcub_1_1_block_radix_sort.html#abdbfda59c129946222ab10d2e2e6f6f5',1,'cub::BlockRadixSort::SortBlocked(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)'],['../classcub_1_1_block_radix_sort.html#a7e304558942536fc1636849f8d93d896',1,'cub::BlockRadixSort::SortBlocked(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)']]], + ['sortblockedtostriped',['SortBlockedToStriped',['../classcub_1_1_block_radix_sort.html#ac7fe497d674f5da3062a3d34f010f438',1,'cub::BlockRadixSort::SortBlockedToStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)'],['../classcub_1_1_block_radix_sort.html#ab1a7c9c9e536f13741b4c4f6c369ce80',1,'cub::BlockRadixSort::SortBlockedToStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)']]], + ['sortstriped',['SortStriped',['../classcub_1_1_block_radix_sort.html#a81e081320239182670da329c2b036166',1,'cub::BlockRadixSort::SortStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)'],['../classcub_1_1_block_radix_sort.html#a0c730daecd6cc6f69135ae36d11d5c53',1,'cub::BlockRadixSort::SortStriped(SmemStorage &smem_storage, KeyType(&keys)[ITEMS_PER_THREAD], ValueType(&values)[ITEMS_PER_THREAD], unsigned int begin_bit=0, const unsigned int &end_bit=sizeof(KeyType)*8)']]], + ['store',['Store',['../classcub_1_1_block_store.html#a1fc63a32e80b1275145a469e719f8530',1,'cub::BlockStore::Store(SmemStorage &smem_storage, OutputIterator block_itr, T(&items)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_store.html#a3c1f7f37767338869f44e410a90f2255',1,'cub::BlockStore::Store(SmemStorage &smem_storage, OutputIterator block_itr, const SizeT &guarded_items, T(&items)[ITEMS_PER_THREAD])']]], + ['stripedtoblocked',['StripedToBlocked',['../classcub_1_1_block_exchange.html#ad8000bf73c3ce935018f32451985ae37',1,'cub::BlockExchange']]], + ['sum',['Sum',['../classcub_1_1_block_reduce.html#a136060887c434257984b8bf3f5c62323',1,'cub::BlockReduce::Sum(SmemStorage &smem_storage, T input)'],['../classcub_1_1_block_reduce.html#abe33a10ae2e316943e95d23f1d4d702a',1,'cub::BlockReduce::Sum(SmemStorage &smem_storage, T(&inputs)[ITEMS_PER_THREAD])'],['../classcub_1_1_block_reduce.html#aad43736f8ea38a2c9052059b1d80c7fc',1,'cub::BlockReduce::Sum(SmemStorage &smem_storage, T input, const unsigned int &valid_threads)']]] +]; diff --git a/docs/html/search/functions_74.html b/docs/html/search/functions_74.html new file mode 100644 index 0000000000..1605901ee1 --- /dev/null +++ b/docs/html/search/functions_74.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/functions_74.js b/docs/html/search/functions_74.js new file mode 100644 index 0000000000..1f3526c6fe --- /dev/null +++ b/docs/html/search/functions_74.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['threadload',['ThreadLoad',['../group___simt_utils.html#ga1e390b9fee4c8012a021d49d9b76b1e8',1,'cub']]], + ['threadstore',['ThreadStore',['../group___simt_utils.html#gad117ecb99b9230a032971b0ac08ca6dc',1,'cub']]] +]; diff --git a/docs/html/search/groups_63.html b/docs/html/search/groups_63.html new file mode 100644 index 0000000000..f4ece649de --- /dev/null +++ b/docs/html/search/groups_63.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/groups_63.js b/docs/html/search/groups_63.js new file mode 100644 index 0000000000..fc670c39e0 --- /dev/null +++ b/docs/html/search/groups_63.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['cooperative_20simt_20operations',['Cooperative SIMT Operations',['../group___simt_coop.html',1,'']]] +]; diff --git a/docs/html/search/groups_68.html b/docs/html/search/groups_68.html new file mode 100644 index 0000000000..de9940f747 --- /dev/null +++ b/docs/html/search/groups_68.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/groups_68.js b/docs/html/search/groups_68.js new file mode 100644 index 0000000000..8fcb135372 --- /dev/null +++ b/docs/html/search/groups_68.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['host_20utilities',['Host Utilities',['../group___host_util.html',1,'']]] +]; diff --git a/docs/html/search/groups_73.html b/docs/html/search/groups_73.html new file mode 100644 index 0000000000..d77ec54c86 --- /dev/null +++ b/docs/html/search/groups_73.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/groups_73.js b/docs/html/search/groups_73.js new file mode 100644 index 0000000000..7a92113dba --- /dev/null +++ b/docs/html/search/groups_73.js @@ -0,0 +1,5 @@ +var searchData= +[ + ['simt_20primitives',['SIMT Primitives',['../group___simt.html',1,'']]], + ['simt_20utilities',['SIMT Utilities',['../group___simt_utils.html',1,'']]] +]; diff --git a/docs/html/search/mag_sel.png b/docs/html/search/mag_sel.png new file mode 100644 index 0000000000..81f6040a20 Binary files /dev/null and b/docs/html/search/mag_sel.png differ diff --git a/docs/html/search/namespaces_63.html b/docs/html/search/namespaces_63.html new file mode 100644 index 0000000000..a6ca16abdd --- /dev/null +++ b/docs/html/search/namespaces_63.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/namespaces_63.js b/docs/html/search/namespaces_63.js new file mode 100644 index 0000000000..3567b32a4a --- /dev/null +++ b/docs/html/search/namespaces_63.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['cub',['cub',['../namespacecub.html',1,'']]] +]; diff --git a/docs/html/search/nomatches.html b/docs/html/search/nomatches.html new file mode 100644 index 0000000000..b1ded27e9a --- /dev/null +++ b/docs/html/search/nomatches.html @@ -0,0 +1,12 @@ + + + + + + + +
+
No Matches
+
+ + diff --git a/docs/html/search/pages_62.html b/docs/html/search/pages_62.html new file mode 100644 index 0000000000..ea5205842a --- /dev/null +++ b/docs/html/search/pages_62.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/pages_62.js b/docs/html/search/pages_62.js new file mode 100644 index 0000000000..3dcb789fb8 --- /dev/null +++ b/docs/html/search/pages_62.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['bibliographic_20references',['Bibliographic References',['../citelist.html',1,'']]] +]; diff --git a/docs/html/search/search.css b/docs/html/search/search.css new file mode 100644 index 0000000000..5b208eddd8 --- /dev/null +++ b/docs/html/search/search.css @@ -0,0 +1,271 @@ +/*---------------- Search Box */ + +#FSearchBox { + float: left; +} + +#MSearchBox { + white-space : nowrap; + position: absolute; + float: none; + display: inline; + margin-top: 8px; + right: 0px; + width: 170px; + z-index: 102; + background-color: white; +} + +#MSearchBox .left +{ + display:block; + position:absolute; + left:10px; + width:20px; + height:19px; + background:url('search_l.png') no-repeat; + background-position:right; +} + +#MSearchSelect { + display:block; + position:absolute; + width:20px; + height:19px; +} + +.left #MSearchSelect { + left:4px; +} + +.right #MSearchSelect { + right:5px; +} + +#MSearchField { + display:block; + position:absolute; + height:19px; + background:url('search_m.png') repeat-x; + border:none; + width:116px; + margin-left:20px; + padding-left:4px; + color: #909090; + outline: none; + font: 9pt Arial, Verdana, sans-serif; +} + +#FSearchBox #MSearchField { + margin-left:15px; +} + +#MSearchBox .right { + display:block; + position:absolute; + right:10px; + top:0px; + width:20px; + height:19px; + background:url('search_r.png') no-repeat; + background-position:left; +} + +#MSearchClose { + display: none; + position: absolute; + top: 4px; + background : none; + border: none; + margin: 0px 4px 0px 0px; + padding: 0px 0px; + outline: none; +} + +.left #MSearchClose { + left: 6px; +} + +.right #MSearchClose { + right: 2px; +} + +.MSearchBoxActive #MSearchField { + color: #000000; +} + +/*---------------- Search filter selection */ + +#MSearchSelectWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #90A5CE; + background-color: #F9FAFC; + z-index: 1; + padding-top: 4px; + padding-bottom: 4px; + -moz-border-radius: 4px; + -webkit-border-top-left-radius: 4px; + -webkit-border-top-right-radius: 4px; + -webkit-border-bottom-left-radius: 4px; + -webkit-border-bottom-right-radius: 4px; + -webkit-box-shadow: 5px 5px 5px rgba(0, 0, 0, 0.15); +} + +.SelectItem { + font: 8pt Arial, Verdana, sans-serif; + padding-left: 2px; + padding-right: 12px; + border: 0px; +} + +span.SelectionMark { + margin-right: 4px; + font-family: monospace; + outline-style: none; + text-decoration: none; +} + +a.SelectItem { + display: block; + outline-style: none; + color: #000000; + text-decoration: none; + padding-left: 6px; + padding-right: 12px; +} + +a.SelectItem:focus, +a.SelectItem:active { + color: #000000; + outline-style: none; + text-decoration: none; +} + +a.SelectItem:hover { + color: #FFFFFF; + background-color: #3D578C; + outline-style: none; + text-decoration: none; + cursor: pointer; + display: block; +} + +/*---------------- Search results window */ + +iframe#MSearchResults { + width: 60ex; + height: 15em; +} + +#MSearchResultsWindow { + display: none; + position: absolute; + left: 0; top: 0; + border: 1px solid #000; + background-color: #EEF1F7; +} + +/* ----------------------------------- */ + + +#SRIndex { + clear:both; + padding-bottom: 15px; +} + +.SREntry { + font-size: 10pt; + padding-left: 1ex; +} + +.SRPage .SREntry { + font-size: 8pt; + padding: 1px 5px; +} + +body.SRPage { + margin: 5px 2px; +} + +.SRChildren { + padding-left: 3ex; padding-bottom: .5em +} + +.SRPage .SRChildren { + display: none; +} + +.SRSymbol { + font-weight: bold; + color: #425E97; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRScope { + display: block; + color: #425E97; + font-family: Arial, Verdana, sans-serif; + text-decoration: none; + outline: none; +} + +a.SRSymbol:focus, a.SRSymbol:active, +a.SRScope:focus, a.SRScope:active { + text-decoration: underline; +} + +span.SRScope { + padding-left: 4px; +} + +.SRPage .SRStatus { + padding: 2px 5px; + font-size: 8pt; + font-style: italic; +} + +.SRResult { + display: none; +} + +DIV.searchresults { + margin-left: 10px; + margin-right: 10px; +} + +/*---------------- External search page results */ + +.searchresult { + background-color: #F0F3F8; +} + +.pages b { + color: white; + padding: 5px 5px 3px 5px; + background-image: url("../tab_a.png"); + background-repeat: repeat-x; + text-shadow: 0 1px 1px #000000; +} + +.pages { + line-height: 17px; + margin-left: 4px; + text-decoration: none; +} + +.hl { + font-weight: bold; +} + +#searchresults { + margin-bottom: 20px; +} + +.searchpages { + margin-top: 10px; +} + diff --git a/docs/html/search/search.js b/docs/html/search/search.js new file mode 100644 index 0000000000..1c7ea7e152 --- /dev/null +++ b/docs/html/search/search.js @@ -0,0 +1,817 @@ +// Search script generated by doxygen +// Copyright (C) 2009 by Dimitri van Heesch. + +// The code in this file is loosly based on main.js, part of Natural Docs, +// which is Copyright (C) 2003-2008 Greg Valure +// Natural Docs is licensed under the GPL. + +var indexSectionsWithContent = +{ + 0: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000111111011001111101110110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 1: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110010001001110001110010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 2: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 3: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010100000000001000010010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 4: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010111001001001001110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 5: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 6: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 7: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 8: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 9: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 10: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000010000000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", + 11: "0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" +}; + +var indexSectionNames = +{ + 0: "all", + 1: "classes", + 2: "namespaces", + 3: "files", + 4: "functions", + 5: "variables", + 6: "typedefs", + 7: "enums", + 8: "enumvalues", + 9: "defines", + 10: "groups", + 11: "pages" +}; + +function convertToId(search) +{ + var result = ''; + for (i=0;i do a search + { + this.Search(); + } + } + + this.OnSearchSelectKey = function(evt) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==40 && this.searchIndex0) // Up + { + this.searchIndex--; + this.OnSelectItem(this.searchIndex); + } + else if (e.keyCode==13 || e.keyCode==27) + { + this.OnSelectItem(this.searchIndex); + this.CloseSelectionWindow(); + this.DOMSearchField().focus(); + } + return false; + } + + // --------- Actions + + // Closes the results window. + this.CloseResultsWindow = function() + { + this.DOMPopupSearchResultsWindow().style.display = 'none'; + this.DOMSearchClose().style.display = 'none'; + this.Activate(false); + } + + this.CloseSelectionWindow = function() + { + this.DOMSearchSelectWindow().style.display = 'none'; + } + + // Performs a search. + this.Search = function() + { + this.keyTimeout = 0; + + // strip leading whitespace + var searchValue = this.DOMSearchField().value.replace(/^ +/, ""); + + var code = searchValue.toLowerCase().charCodeAt(0); + var hexCode; + if (code<16) + { + hexCode="0"+code.toString(16); + } + else + { + hexCode=code.toString(16); + } + + var resultsPage; + var resultsPageWithSearch; + var hasResultsPage; + + if (indexSectionsWithContent[this.searchIndex].charAt(code) == '1') + { + resultsPage = this.resultsPath + '/' + indexSectionNames[this.searchIndex] + '_' + hexCode + '.html'; + resultsPageWithSearch = resultsPage+'?'+escape(searchValue); + hasResultsPage = true; + } + else // nothing available for this search term + { + resultsPage = this.resultsPath + '/nomatches.html'; + resultsPageWithSearch = resultsPage; + hasResultsPage = false; + } + + window.frames.MSearchResults.location = resultsPageWithSearch; + var domPopupSearchResultsWindow = this.DOMPopupSearchResultsWindow(); + + if (domPopupSearchResultsWindow.style.display!='block') + { + var domSearchBox = this.DOMSearchBox(); + this.DOMSearchClose().style.display = 'inline'; + if (this.insideFrame) + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + domPopupSearchResultsWindow.style.position = 'relative'; + domPopupSearchResultsWindow.style.display = 'block'; + var width = document.body.clientWidth - 8; // the -8 is for IE :-( + domPopupSearchResultsWindow.style.width = width + 'px'; + domPopupSearchResults.style.width = width + 'px'; + } + else + { + var domPopupSearchResults = this.DOMPopupSearchResults(); + var left = getXPos(domSearchBox) + 150; // domSearchBox.offsetWidth; + var top = getYPos(domSearchBox) + 20; // domSearchBox.offsetHeight + 1; + domPopupSearchResultsWindow.style.display = 'block'; + left -= domPopupSearchResults.offsetWidth; + domPopupSearchResultsWindow.style.top = top + 'px'; + domPopupSearchResultsWindow.style.left = left + 'px'; + } + } + + this.lastSearchValue = searchValue; + this.lastResultsPage = resultsPage; + } + + // -------- Activation Functions + + // Activates or deactivates the search panel, resetting things to + // their default values if necessary. + this.Activate = function(isActive) + { + if (isActive || // open it + this.DOMPopupSearchResultsWindow().style.display == 'block' + ) + { + this.DOMSearchBox().className = 'MSearchBoxActive'; + + var searchField = this.DOMSearchField(); + + if (searchField.value == this.searchLabel) // clear "Search" term upon entry + { + searchField.value = ''; + this.searchActive = true; + } + } + else if (!isActive) // directly remove the panel + { + this.DOMSearchBox().className = 'MSearchBoxInactive'; + this.DOMSearchField().value = this.searchLabel; + this.searchActive = false; + this.lastSearchValue = '' + this.lastResultsPage = ''; + } + } +} + +// ----------------------------------------------------------------------- + +// The class that handles everything on the search results page. +function SearchResults(name) +{ + // The number of matches from the last run of . + this.lastMatchCount = 0; + this.lastKey = 0; + this.repeatOn = false; + + // Toggles the visibility of the passed element ID. + this.FindChildElement = function(id) + { + var parentElement = document.getElementById(id); + var element = parentElement.firstChild; + + while (element && element!=parentElement) + { + if (element.nodeName == 'DIV' && element.className == 'SRChildren') + { + return element; + } + + if (element.nodeName == 'DIV' && element.hasChildNodes()) + { + element = element.firstChild; + } + else if (element.nextSibling) + { + element = element.nextSibling; + } + else + { + do + { + element = element.parentNode; + } + while (element && element!=parentElement && !element.nextSibling); + + if (element && element!=parentElement) + { + element = element.nextSibling; + } + } + } + } + + this.Toggle = function(id) + { + var element = this.FindChildElement(id); + if (element) + { + if (element.style.display == 'block') + { + element.style.display = 'none'; + } + else + { + element.style.display = 'block'; + } + } + } + + // Searches for the passed string. If there is no parameter, + // it takes it from the URL query. + // + // Always returns true, since other documents may try to call it + // and that may or may not be possible. + this.Search = function(search) + { + if (!search) // get search word from URL + { + search = window.location.search; + search = search.substring(1); // Remove the leading '?' + search = unescape(search); + } + + search = search.replace(/^ +/, ""); // strip leading spaces + search = search.replace(/ +$/, ""); // strip trailing spaces + search = search.toLowerCase(); + search = convertToId(search); + + var resultRows = document.getElementsByTagName("div"); + var matches = 0; + + var i = 0; + while (i < resultRows.length) + { + var row = resultRows.item(i); + if (row.className == "SRResult") + { + var rowMatchName = row.id.toLowerCase(); + rowMatchName = rowMatchName.replace(/^sr\d*_/, ''); // strip 'sr123_' + + if (search.length<=rowMatchName.length && + rowMatchName.substr(0, search.length)==search) + { + row.style.display = 'block'; + matches++; + } + else + { + row.style.display = 'none'; + } + } + i++; + } + document.getElementById("Searching").style.display='none'; + if (matches == 0) // no results + { + document.getElementById("NoMatches").style.display='block'; + } + else // at least one result + { + document.getElementById("NoMatches").style.display='none'; + } + this.lastMatchCount = matches; + return true; + } + + // return the first item with index index or higher that is visible + this.NavNext = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index++; + } + return focusItem; + } + + this.NavPrev = function(index) + { + var focusItem; + while (1) + { + var focusName = 'Item'+index; + focusItem = document.getElementById(focusName); + if (focusItem && focusItem.parentNode.parentNode.style.display=='block') + { + break; + } + else if (!focusItem) // last element + { + break; + } + focusItem=null; + index--; + } + return focusItem; + } + + this.ProcessKeys = function(e) + { + if (e.type == "keydown") + { + this.repeatOn = false; + this.lastKey = e.keyCode; + } + else if (e.type == "keypress") + { + if (!this.repeatOn) + { + if (this.lastKey) this.repeatOn = true; + return false; // ignore first keypress after keydown + } + } + else if (e.type == "keyup") + { + this.lastKey = 0; + this.repeatOn = false; + } + return this.lastKey!=0; + } + + this.Nav = function(evt,itemIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + var newIndex = itemIndex-1; + var focusItem = this.NavPrev(newIndex); + if (focusItem) + { + var child = this.FindChildElement(focusItem.parentNode.parentNode.id); + if (child && child.style.display == 'block') // children visible + { + var n=0; + var tmpElem; + while (1) // search for last child + { + tmpElem = document.getElementById('Item'+newIndex+'_c'+n); + if (tmpElem) + { + focusItem = tmpElem; + } + else // found it! + { + break; + } + n++; + } + } + } + if (focusItem) + { + focusItem.focus(); + } + else // return focus to search field + { + parent.document.getElementById("MSearchField").focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = itemIndex+1; + var focusItem; + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem && elem.style.display == 'block') // children visible + { + focusItem = document.getElementById('Item'+itemIndex+'_c0'); + } + if (!focusItem) focusItem = this.NavNext(newIndex); + if (focusItem) focusItem.focus(); + } + else if (this.lastKey==39) // Right + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'block'; + } + else if (this.lastKey==37) // Left + { + var item = document.getElementById('Item'+itemIndex); + var elem = this.FindChildElement(item.parentNode.parentNode.id); + if (elem) elem.style.display = 'none'; + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } + + this.NavChild = function(evt,itemIndex,childIndex) + { + var e = (evt) ? evt : window.event; // for IE + if (e.keyCode==13) return true; + if (!this.ProcessKeys(e)) return false; + + if (this.lastKey==38) // Up + { + if (childIndex>0) + { + var newIndex = childIndex-1; + document.getElementById('Item'+itemIndex+'_c'+newIndex).focus(); + } + else // already at first child, jump to parent + { + document.getElementById('Item'+itemIndex).focus(); + } + } + else if (this.lastKey==40) // Down + { + var newIndex = childIndex+1; + var elem = document.getElementById('Item'+itemIndex+'_c'+newIndex); + if (!elem) // last child, jump to parent next parent + { + elem = this.NavNext(itemIndex+1); + } + if (elem) + { + elem.focus(); + } + } + else if (this.lastKey==27) // Escape + { + parent.searchBox.CloseResultsWindow(); + parent.document.getElementById("MSearchField").focus(); + } + else if (this.lastKey==13) // Enter + { + return true; + } + return false; + } +} + +function setKeyActions(elem,action) +{ + elem.setAttribute('onkeydown',action); + elem.setAttribute('onkeypress',action); + elem.setAttribute('onkeyup',action); +} + +function setClassAttr(elem,attr) +{ + elem.setAttribute('class',attr); + elem.setAttribute('className',attr); +} + +function createResults() +{ + var results = document.getElementById("SRResults"); + for (var e=0; e + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/typedefs_73.js b/docs/html/search/typedefs_73.js new file mode 100644 index 0000000000..32307ec10b --- /dev/null +++ b/docs/html/search/typedefs_73.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['smemstorage',['SmemStorage',['../classcub_1_1_warp_scan.html#a2bfa864e963cb4965139ac1b6c66d1b7',1,'cub::WarpScan::SmemStorage()'],['../classcub_1_1_block_reduce.html#aea61e2e067e0e2d3fba2b0c8e0f73d8d',1,'cub::BlockReduce::SmemStorage()'],['../classcub_1_1_block_scan.html#abda6008896e2e17b50c7deb0ab320e64',1,'cub::BlockScan::SmemStorage()'],['../classcub_1_1_block_radix_sort.html#a495e63ab526ce35e6dfce9fb5206746c',1,'cub::BlockRadixSort::SmemStorage()'],['../classcub_1_1_block_load.html#a09296fd690f1452df9cae24a037e906a',1,'cub::BlockLoad::SmemStorage()'],['../classcub_1_1_block_store.html#aa80c1691bc7aa80bc38c2797b3a99c24',1,'cub::BlockStore::SmemStorage()'],['../classcub_1_1_block_exchange.html#ad91573946e4abe5ae5e34277ded1c215',1,'cub::BlockExchange::SmemStorage()'],['../classcub_1_1_block_discontinuity.html#a855c92f9c3869909913860fa11e755a4',1,'cub::BlockDiscontinuity::SmemStorage()']]] +]; diff --git a/docs/html/search/typedefs_74.html b/docs/html/search/typedefs_74.html new file mode 100644 index 0000000000..b2f6d2a098 --- /dev/null +++ b/docs/html/search/typedefs_74.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/typedefs_74.js b/docs/html/search/typedefs_74.js new file mode 100644 index 0000000000..a7b7b7106d --- /dev/null +++ b/docs/html/search/typedefs_74.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['type',['Type',['../structcub_1_1_if.html#af689e9527f56372e66413b65581ded8e',1,'cub::If::Type()'],['../structcub_1_1_remove_qualifiers.html#a9143e196ef5e6a0176b953f677e94671',1,'cub::RemoveQualifiers::Type()'],['../structcub_1_1_enable_if.html#aafd9405b5887d2a6d3553eee0202798a',1,'cub::EnableIf::Type()']]] +]; diff --git a/docs/html/search/variables_63.html b/docs/html/search/variables_63.html new file mode 100644 index 0000000000..422085c127 --- /dev/null +++ b/docs/html/search/variables_63.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/variables_63.js b/docs/html/search/variables_63.js new file mode 100644 index 0000000000..2cfc4afb05 --- /dev/null +++ b/docs/html/search/variables_63.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['category',['CATEGORY',['../structcub_1_1_base_traits.html#a25ff6477c84dc3bd5f4b5e70cd600f09',1,'cub::BaseTraits']]] +]; diff --git a/docs/html/search/variables_76.html b/docs/html/search/variables_76.html new file mode 100644 index 0000000000..8af2374616 --- /dev/null +++ b/docs/html/search/variables_76.html @@ -0,0 +1,26 @@ + + + + + + + + + +
+
Loading...
+
+ +
Searching...
+
No Matches
+ +
+ + diff --git a/docs/html/search/variables_76.js b/docs/html/search/variables_76.js new file mode 100644 index 0000000000..58c39304d9 --- /dev/null +++ b/docs/html/search/variables_76.js @@ -0,0 +1,4 @@ +var searchData= +[ + ['value',['VALUE',['../structcub_1_1_log2.html#ad1923657cb57427a8621f53022590cd2',1,'cub::Log2']]] +]; diff --git a/cub/docs/images/simt_abstraction.png b/docs/html/simt_abstraction.png similarity index 100% rename from cub/docs/images/simt_abstraction.png rename to docs/html/simt_abstraction.png diff --git a/cub/docs/images/sorting_logo.png b/docs/html/sorting_logo.png similarity index 100% rename from cub/docs/images/sorting_logo.png rename to docs/html/sorting_logo.png diff --git a/cub/docs/images/striped.png b/docs/html/striped.png similarity index 100% rename from cub/docs/images/striped.png rename to docs/html/striped.png diff --git a/docs/html/structcub_1_1_array_traits.html b/docs/html/structcub_1_1_array_traits.html new file mode 100644 index 0000000000..2ac4b06ced --- /dev/null +++ b/docs/html/structcub_1_1_array_traits.html @@ -0,0 +1,124 @@ + + + + + + + +CUB: cub::ArrayTraits< ArrayType, LENGTH > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::ArrayTraits< ArrayType, LENGTH > Struct Template Reference
+
+
+

Detailed description

+

template<typename ArrayType, int LENGTH = -1>
+struct cub::ArrayTraits< ArrayType, LENGTH >

+ +

Array traits.

+

The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_base_traits-members.html b/docs/html/structcub_1_1_base_traits-members.html new file mode 100644 index 0000000000..b0887349c0 --- /dev/null +++ b/docs/html/structcub_1_1_base_traits-members.html @@ -0,0 +1,122 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > Member List
+
+ + + + + + diff --git a/docs/html/structcub_1_1_base_traits.html b/docs/html/structcub_1_1_base_traits.html new file mode 100644 index 0000000000..c1cd723620 --- /dev/null +++ b/docs/html/structcub_1_1_base_traits.html @@ -0,0 +1,143 @@ + + + + + + + +CUB: cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits > Struct Template Reference
+
+
+

Detailed description

+

template<Category _CATEGORY, bool _PRIMITIVE, bool _NULL_TYPE, typename _UnsignedBits>
+struct cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits >

+ +

Basic type traits.

+
+ + + +

+Public Types

enum  { PRIMITIVE = _PRIMITIVE, +NULL_TYPE = _NULL_TYPE + }
 
+ + + + +

+Static Public Members

+static const Category CATEGORY = _CATEGORY
 Category.
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_enable_if-members.html b/docs/html/structcub_1_1_enable_if-members.html new file mode 100644 index 0000000000..a4b67d1665 --- /dev/null +++ b/docs/html/structcub_1_1_enable_if-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::EnableIf< Condition, T > Member List
+
+
+ +

This is the complete list of members for cub::EnableIf< Condition, T >, including all inherited members.

+ + +
Type typedefcub::EnableIf< Condition, T >
+ + + + + diff --git a/docs/html/structcub_1_1_enable_if.html b/docs/html/structcub_1_1_enable_if.html new file mode 100644 index 0000000000..f27a140767 --- /dev/null +++ b/docs/html/structcub_1_1_enable_if.html @@ -0,0 +1,135 @@ + + + + + + + +CUB: cub::EnableIf< Condition, T > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::EnableIf< Condition, T > Struct Template Reference
+
+
+

Detailed description

+

template<bool Condition, class T = void>
+struct cub::EnableIf< Condition, T >

+ +

Simple enable-if (similar to Boost)

+
+ + + + +

+Public Types

+typedef T Type
 Enable-if type for SFINAE dummy variables.
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_equality-members.html b/docs/html/structcub_1_1_equality-members.html new file mode 100644 index 0000000000..73f2bee291 --- /dev/null +++ b/docs/html/structcub_1_1_equality-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::Equality< T > Member List
+
+
+ +

This is the complete list of members for cub::Equality< T >, including all inherited members.

+ + +
operator()(const T &a, const T &b)cub::Equality< T >inline
+ + + + + diff --git a/docs/html/structcub_1_1_equality.html b/docs/html/structcub_1_1_equality.html new file mode 100644 index 0000000000..0f3d3a3748 --- /dev/null +++ b/docs/html/structcub_1_1_equality.html @@ -0,0 +1,136 @@ + + + + + + + +CUB: cub::Equality< T > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::Equality< T > Struct Template Reference
+
+
+

Detailed description

+

template<typename T>
+struct cub::Equality< T >

+ +

Default equality functor.

+
+ + + + +

+Public Methods

+__host__ __device__
+__forceinline__ bool 
operator() (const T &a, const T &b)
 Boolean equality operator, returns (a == b)
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_equals-members.html b/docs/html/structcub_1_1_equals-members.html new file mode 100644 index 0000000000..d66c06019d --- /dev/null +++ b/docs/html/structcub_1_1_equals-members.html @@ -0,0 +1,121 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::Equals< A, B > Member List
+
+
+ +

This is the complete list of members for cub::Equals< A, B >, including all inherited members.

+ + + +
NEGATE enum value (defined in cub::Equals< A, B >)cub::Equals< A, B >
VALUE enum value (defined in cub::Equals< A, B >)cub::Equals< A, B >
+ + + + + diff --git a/docs/html/structcub_1_1_equals.html b/docs/html/structcub_1_1_equals.html new file mode 100644 index 0000000000..011bff37d4 --- /dev/null +++ b/docs/html/structcub_1_1_equals.html @@ -0,0 +1,135 @@ + + + + + + + +CUB: cub::Equals< A, B > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::Equals< A, B > Struct Template Reference
+
+
+

Detailed description

+

template<typename A, typename B>
+struct cub::Equals< A, B >

+ +

Type equality test.

+
+ + + +

+Public Types

enum  { VALUE = 0, +NEGATE = 1 + }
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_if-members.html b/docs/html/structcub_1_1_if-members.html new file mode 100644 index 0000000000..fff13226e7 --- /dev/null +++ b/docs/html/structcub_1_1_if-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::If< IF, ThenType, ElseType > Member List
+
+
+ +

This is the complete list of members for cub::If< IF, ThenType, ElseType >, including all inherited members.

+ + +
Type typedefcub::If< IF, ThenType, ElseType >
+ + + + + diff --git a/docs/html/structcub_1_1_if.html b/docs/html/structcub_1_1_if.html new file mode 100644 index 0000000000..b74d319216 --- /dev/null +++ b/docs/html/structcub_1_1_if.html @@ -0,0 +1,135 @@ + + + + + + + +CUB: cub::If< IF, ThenType, ElseType > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::If< IF, ThenType, ElseType > Struct Template Reference
+
+
+

Detailed description

+

template<bool IF, typename ThenType, typename ElseType>
+struct cub::If< IF, ThenType, ElseType >

+ +

Type selection (IF ? ThenType : ElseType)

+
+ + + + +

+Public Types

+typedef ThenType Type
 Conditional type result.
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_is_volatile-members.html b/docs/html/structcub_1_1_is_volatile-members.html new file mode 100644 index 0000000000..d53af4b367 --- /dev/null +++ b/docs/html/structcub_1_1_is_volatile-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::IsVolatile< Tp > Member List
+
+
+ +

This is the complete list of members for cub::IsVolatile< Tp >, including all inherited members.

+ + +
VALUE enum value (defined in cub::IsVolatile< Tp >)cub::IsVolatile< Tp >
+ + + + + diff --git a/docs/html/structcub_1_1_is_volatile.html b/docs/html/structcub_1_1_is_volatile.html new file mode 100644 index 0000000000..519f242133 --- /dev/null +++ b/docs/html/structcub_1_1_is_volatile.html @@ -0,0 +1,134 @@ + + + + + + + +CUB: cub::IsVolatile< Tp > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::IsVolatile< Tp > Struct Template Reference
+
+
+

Detailed description

+

template<typename Tp>
+struct cub::IsVolatile< Tp >

+ +

Volatile modifier test.

+
+ + + +

+Public Types

enum  { VALUE = 0 + }
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_log2-members.html b/docs/html/structcub_1_1_log2-members.html new file mode 100644 index 0000000000..e7cccc6131 --- /dev/null +++ b/docs/html/structcub_1_1_log2-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::Log2< N, CURRENT_VAL, COUNT > Member List
+
+
+ +

This is the complete list of members for cub::Log2< N, CURRENT_VAL, COUNT >, including all inherited members.

+ + +
VALUEcub::Log2< N, CURRENT_VAL, COUNT >static
+ + + + + diff --git a/docs/html/structcub_1_1_log2.html b/docs/html/structcub_1_1_log2.html new file mode 100644 index 0000000000..45e5ef6dda --- /dev/null +++ b/docs/html/structcub_1_1_log2.html @@ -0,0 +1,136 @@ + + + + + + + +CUB: cub::Log2< N, CURRENT_VAL, COUNT > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::Log2< N, CURRENT_VAL, COUNT > Struct Template Reference
+
+
+

Detailed description

+

template<int N, int CURRENT_VAL = N, int COUNT = 0>
+struct cub::Log2< N, CURRENT_VAL, COUNT >

+ +

Statically determine log2(N), rounded up.

+

For example: Log2<8>::VALUE // 3 Log2<3>::VALUE // 2

+
+ + + + +

+Static Public Members

+static const int VALUE = Log2<N, (CURRENT_VAL >> 1), COUNT + 1>::VALUE
 Static logarithm value.
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_max-members.html b/docs/html/structcub_1_1_max-members.html new file mode 100644 index 0000000000..22cea2bafc --- /dev/null +++ b/docs/html/structcub_1_1_max-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::Max< T > Member List
+
+
+ +

This is the complete list of members for cub::Max< T >, including all inherited members.

+ + +
operator()(const T &a, const T &b)cub::Max< T >inline
+ + + + + diff --git a/docs/html/structcub_1_1_max.html b/docs/html/structcub_1_1_max.html new file mode 100644 index 0000000000..effbd73321 --- /dev/null +++ b/docs/html/structcub_1_1_max.html @@ -0,0 +1,136 @@ + + + + + + + +CUB: cub::Max< T > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::Max< T > Struct Template Reference
+
+
+

Detailed description

+

template<typename T>
+struct cub::Max< T >

+ +

Default max functor.

+
+ + + + +

+Public Methods

+__host__ __device__
+__forceinline__ T 
operator() (const T &a, const T &b)
 Boolean max operator, returns (a > b) ? a : b
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_null_type.html b/docs/html/structcub_1_1_null_type.html new file mode 100644 index 0000000000..510c424066 --- /dev/null +++ b/docs/html/structcub_1_1_null_type.html @@ -0,0 +1,121 @@ + + + + + + + +CUB: cub::NullType Struct Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::NullType Struct Reference
+
+
+

Detailed description

+

A simple "NULL" marker type.

+

The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_numeric_traits-members.html b/docs/html/structcub_1_1_numeric_traits-members.html new file mode 100644 index 0000000000..027c782222 --- /dev/null +++ b/docs/html/structcub_1_1_numeric_traits-members.html @@ -0,0 +1,122 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::NumericTraits< T > Member List
+
+ + + + + + diff --git a/docs/html/structcub_1_1_numeric_traits.html b/docs/html/structcub_1_1_numeric_traits.html new file mode 100644 index 0000000000..76f032e61b --- /dev/null +++ b/docs/html/structcub_1_1_numeric_traits.html @@ -0,0 +1,147 @@ + + + + + + + +CUB: cub::NumericTraits< T > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::NumericTraits< T > Struct Template Reference
+
+
+

Detailed description

+

template<typename T>
+struct cub::NumericTraits< T >

+ +

Numeric type traits.

+
+Inheritance diagram for cub::NumericTraits< T >:
+
+
+ + +cub::BaseTraits< NOT_A_NUMBER, false, false, T > + +
+ + + + + + + + + +

+Additional Inherited Members

- Public Types inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, T >
enum  
 
- Static Public Members inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, T >
+static const Category CATEGORY
 Category.
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_numeric_traits.png b/docs/html/structcub_1_1_numeric_traits.png new file mode 100644 index 0000000000..39396980c2 Binary files /dev/null and b/docs/html/structcub_1_1_numeric_traits.png differ diff --git a/docs/html/structcub_1_1_remove_qualifiers-members.html b/docs/html/structcub_1_1_remove_qualifiers-members.html new file mode 100644 index 0000000000..099a1e36a4 --- /dev/null +++ b/docs/html/structcub_1_1_remove_qualifiers-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::RemoveQualifiers< Tp, Up > Member List
+
+
+ +

This is the complete list of members for cub::RemoveQualifiers< Tp, Up >, including all inherited members.

+ + +
Type typedefcub::RemoveQualifiers< Tp, Up >
+ + + + + diff --git a/docs/html/structcub_1_1_remove_qualifiers.html b/docs/html/structcub_1_1_remove_qualifiers.html new file mode 100644 index 0000000000..95ecac96fe --- /dev/null +++ b/docs/html/structcub_1_1_remove_qualifiers.html @@ -0,0 +1,136 @@ + + + + + + + +CUB: cub::RemoveQualifiers< Tp, Up > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::RemoveQualifiers< Tp, Up > Struct Template Reference
+
+
+

Detailed description

+

template<typename Tp, typename Up = Tp>
+struct cub::RemoveQualifiers< Tp, Up >

+ +

Removes const and volatile qualifiers from type Tp.

+

For example: typename RemoveQualifiers<volatile int>::Type // int;

+
+ + + + +

+Public Types

+typedef Up Type
 Type without const and volatile qualifiers.
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_sum-members.html b/docs/html/structcub_1_1_sum-members.html new file mode 100644 index 0000000000..4a513b4903 --- /dev/null +++ b/docs/html/structcub_1_1_sum-members.html @@ -0,0 +1,120 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::Sum< T > Member List
+
+
+ +

This is the complete list of members for cub::Sum< T >, including all inherited members.

+ + +
operator()(const T &a, const T &b)cub::Sum< T >inline
+ + + + + diff --git a/docs/html/structcub_1_1_sum.html b/docs/html/structcub_1_1_sum.html new file mode 100644 index 0000000000..f7ae9a117c --- /dev/null +++ b/docs/html/structcub_1_1_sum.html @@ -0,0 +1,136 @@ + + + + + + + +CUB: cub::Sum< T > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::Sum< T > Struct Template Reference
+
+
+

Detailed description

+

template<typename T>
+struct cub::Sum< T >

+ +

Default sum functor.

+
+ + + + +

+Public Methods

+__host__ __device__
+__forceinline__ T 
operator() (const T &a, const T &b)
 Boolean sum operator, returns a + b
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_traits-members.html b/docs/html/structcub_1_1_traits-members.html new file mode 100644 index 0000000000..51ad3ec906 --- /dev/null +++ b/docs/html/structcub_1_1_traits-members.html @@ -0,0 +1,122 @@ + + + + + + + +CUB: Member List + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+
+
cub::Traits< T > Member List
+
+ + + + + + diff --git a/docs/html/structcub_1_1_traits.html b/docs/html/structcub_1_1_traits.html new file mode 100644 index 0000000000..3e284f8dd6 --- /dev/null +++ b/docs/html/structcub_1_1_traits.html @@ -0,0 +1,148 @@ + + + + + + + +CUB: cub::Traits< T > Struct Template Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + + +
+ +
+ + +
+
+ +
+
cub::Traits< T > Struct Template Reference
+
+
+

Detailed description

+

template<typename T>
+struct cub::Traits< T >

+ +

Type traits.

+
+Inheritance diagram for cub::Traits< T >:
+
+
+ + +cub::NumericTraits< RemoveQualifiers< T >::Type > +cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type > + +
+ + + + + + + + + +

+Additional Inherited Members

- Public Types inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type >
enum  
 
- Static Public Members inherited from cub::BaseTraits< NOT_A_NUMBER, false, false, RemoveQualifiers< T >::Type >
+static const Category CATEGORY
 Category.
 
+
The documentation for this struct was generated from the following file: +
+ + + + + diff --git a/docs/html/structcub_1_1_traits.png b/docs/html/structcub_1_1_traits.png new file mode 100644 index 0000000000..64a84269f4 Binary files /dev/null and b/docs/html/structcub_1_1_traits.png differ diff --git a/docs/html/sync_off.png b/docs/html/sync_off.png new file mode 100644 index 0000000000..3b443fc628 Binary files /dev/null and b/docs/html/sync_off.png differ diff --git a/docs/html/sync_on.png b/docs/html/sync_on.png new file mode 100644 index 0000000000..e08320fb64 Binary files /dev/null and b/docs/html/sync_on.png differ diff --git a/docs/html/tab_a.png b/docs/html/tab_a.png new file mode 100644 index 0000000000..3b725c41c5 Binary files /dev/null and b/docs/html/tab_a.png differ diff --git a/docs/html/tab_b.png b/docs/html/tab_b.png new file mode 100644 index 0000000000..e2b4a8638c Binary files /dev/null and b/docs/html/tab_b.png differ diff --git a/cub/docs/images/tab_b_alt.png b/docs/html/tab_b_alt.png similarity index 100% rename from cub/docs/images/tab_b_alt.png rename to docs/html/tab_b_alt.png diff --git a/docs/html/tab_h.png b/docs/html/tab_h.png new file mode 100644 index 0000000000..fd5cb70548 Binary files /dev/null and b/docs/html/tab_h.png differ diff --git a/docs/html/tab_s.png b/docs/html/tab_s.png new file mode 100644 index 0000000000..ab478c95b6 Binary files /dev/null and b/docs/html/tab_s.png differ diff --git a/docs/html/tabs.css b/docs/html/tabs.css new file mode 100644 index 0000000000..9cf578f23a --- /dev/null +++ b/docs/html/tabs.css @@ -0,0 +1,60 @@ +.tabs, .tabs2, .tabs3 { + background-image: url('tab_b.png'); + width: 100%; + z-index: 101; + font-size: 13px; + font-family: 'Lucida Grande',Geneva,Helvetica,Arial,sans-serif; +} + +.tabs2 { + font-size: 10px; +} +.tabs3 { + font-size: 9px; +} + +.tablist { + margin: 0; + padding: 0; + display: table; +} + +.tablist li { + float: left; + display: table-cell; + background-image: url('tab_b.png'); + line-height: 36px; + list-style: none; +} + +.tablist a { + display: block; + padding: 0 20px; + font-weight: bold; + background-image:url('tab_s.png'); + background-repeat:no-repeat; + background-position:right; + color: #283A5D; + text-shadow: 0px 1px 1px rgba(255, 255, 255, 0.9); + text-decoration: none; + outline: none; +} + +.tabs3 .tablist a { + padding: 0 10px; +} + +.tablist a:hover { + background-image: url('tab_h.png'); + background-repeat:repeat-x; + color: #fff; + text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0); + text-decoration: none; +} + +.tablist li.current a { + background-image: url('tab_a.png'); + background-repeat:repeat-x; + color: #fff; + text-shadow: 0px 1px 1px rgba(0, 0, 0, 1.0); +} diff --git a/docs/html/thread__load_8cuh.html b/docs/html/thread__load_8cuh.html new file mode 100644 index 0000000000..df835ca18f --- /dev/null +++ b/docs/html/thread__load_8cuh.html @@ -0,0 +1,151 @@ + + + + + + + +CUB: thread_load.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
thread_load.cuh File Reference
+
+
+
#include <cuda.h>
+#include <iterator>
+#include "../ptx_intrinsics.cuh"
+#include "../type_utils.cuh"
+#include "../ns_wrapper.cuh"
+
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+ + + + +

+Enumerations

enum  cub::PtxLoadModifier {
+  cub::PTX_LOAD_NONE, +cub::PTX_LOAD_CA, +cub::PTX_LOAD_CG, +cub::PTX_LOAD_CS, +
+  cub::PTX_LOAD_CV, +cub::PTX_LOAD_LDG, +cub::PTX_LOAD_VS +
+ }
 Enumeration of PTX cache-modifiers for memory load operations. More...
 
+ + + + + + +

+Functions

Thread utilities for memory I/O using PTX cache modifiers
template<PtxLoadModifier MODIFIER, typename InputIterator >
__device__ __forceinline__
+std::iterator_traits
+< InputIterator >::value_type 
cub::ThreadLoad (InputIterator itr)
 Thread utility for reading memory using cub::PtxLoadModifier cache modifiers. More...
 
+

Detailed Description

+

Thread utilities for reading memory using PTX cache modifiers.

+
+ + + + + diff --git a/docs/html/thread__store_8cuh.html b/docs/html/thread__store_8cuh.html new file mode 100644 index 0000000000..153c55ea76 --- /dev/null +++ b/docs/html/thread__store_8cuh.html @@ -0,0 +1,147 @@ + + + + + + + +CUB: thread_store.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
thread_store.cuh File Reference
+
+
+
#include <cuda.h>
+#include "../ptx_intrinsics.cuh"
+#include "../type_utils.cuh"
+#include "../ns_wrapper.cuh"
+
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+ + + + +

+Enumerations

enum  cub::PtxStoreModifier {
+  cub::PTX_STORE_NONE, +cub::PTX_STORE_WB, +cub::PTX_STORE_CG, +cub::PTX_STORE_CS, +
+  cub::PTX_STORE_WT, +cub::PTX_STORE_VS +
+ }
 Enumeration of PTX cache-modifiers for memory store operations. More...
 
+ + + + + + +

+Functions

Thread utilities for memory I/O using PTX cache modifiers
template<PtxStoreModifier MODIFIER, typename OutputIterator , typename T >
__device__ __forceinline__ void cub::ThreadStore (OutputIterator itr, const T &val)
 Thread utility for writing memory using cub::PtxStoreModifier cache modifiers. More...
 
+

Detailed Description

+

Thread utilities for writing memory using PTX cache modifiers.

+
+ + + + + diff --git a/cub/docs/images/tile.png b/docs/html/tile.png similarity index 100% rename from cub/docs/images/tile.png rename to docs/html/tile.png diff --git a/cub/docs/images/transpose_logo.png b/docs/html/transpose_logo.png similarity index 100% rename from cub/docs/images/transpose_logo.png rename to docs/html/transpose_logo.png diff --git a/docs/html/type__utils_8cuh.html b/docs/html/type__utils_8cuh.html new file mode 100644 index 0000000000..ebc79c3917 --- /dev/null +++ b/docs/html/type__utils_8cuh.html @@ -0,0 +1,215 @@ + + + + + + + +CUB: type_utils.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
type_utils.cuh File Reference
+
+
+
#include <iostream>
+#include "ns_wrapper.cuh"
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

+Classes

struct  cub::ArrayTraits< ArrayType, LENGTH >
 Array traits. More...
 
struct  cub::BaseTraits< _CATEGORY, _PRIMITIVE, _NULL_TYPE, _UnsignedBits >
 Basic type traits. More...
 
struct  cub::EnableIf< Condition, T >
 Simple enable-if (similar to Boost) More...
 
struct  cub::Equals< A, B >
 Type equality test. More...
 
struct  cub::If< IF, ThenType, ElseType >
 Type selection (IF ? ThenType : ElseType) More...
 
struct  cub::IsVolatile< Tp >
 Volatile modifier test. More...
 
struct  cub::Log2< N, CURRENT_VAL, COUNT >
 Statically determine log2(N), rounded up. More...
 
struct  cub::NullType
 A simple "NULL" marker type. More...
 
struct  cub::NumericTraits< T >
 Numeric type traits. More...
 
struct  cub::RemoveQualifiers< Tp, Up >
 Removes const and volatile qualifiers from type Tp. More...
 
struct  cub::Traits< T >
 Type traits. More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+ + + +

+Macros

#define CUB_HAS_NESTED_TYPE(detect_struct, nested_type_name)
 
+ + + + +

+Enumerations

enum  cub::Category { NOT_A_NUMBER, +SIGNED_INTEGER, +UNSIGNED_INTEGER, +FLOATING_POINT + }
 Basic type traits categories.
 
+

Detailed Description

+

Common type manipulation (metaprogramming) utilities

+

Macro Definition Documentation

+ +
+
+ + + + + + + + + + + + + + + + + + +
#define CUB_HAS_NESTED_TYPE( detect_struct,
 nested_type_name 
)
+
+Value:
template <typename T> \
+
struct detect_struct \
+
{ \
+
template <typename C> \
+
static char& test(typename C::nested_type_name*); \
+
template <typename> \
+
static int& test(...); \
+
enum \
+
{ \
+
VALUE = sizeof(test<T>(0)) < sizeof(int) \
+
}; \
+
};
+

Allows the definition of structures that will detect the presence of the specified type name within other classes

+ +
+
+
+ + + + + diff --git a/docs/html/warp__scan_8cuh.html b/docs/html/warp__scan_8cuh.html new file mode 100644 index 0000000000..3372a112e6 --- /dev/null +++ b/docs/html/warp__scan_8cuh.html @@ -0,0 +1,137 @@ + + + + + + + +CUB: warp_scan.cuh File Reference + + + + + + + + + + + + +
+
+ + + + + + +
+
CUB +
+
+
+ + + + + + + + +
+ +
+ + +
+
+ +
+
warp_scan.cuh File Reference
+
+
+
#include "../thread/thread_load.cuh"
+#include "../thread/thread_store.cuh"
+#include "../device_props.cuh"
+#include "../type_utils.cuh"
+#include "../operators.cuh"
+#include "../ns_wrapper.cuh"
+
+ + + + +

+Classes

class  cub::WarpScan< T, WARPS, LOGICAL_WARP_THREADS >
 WarpScan provides variants of parallel prefix scan across a CUDA warp.

+
+warp_scan_logo.png +
+.
+ More...
 
+ + + + +

+Namespaces

namespace  cub
 CUB namespace.
 
+

Detailed Description

+

cub::WarpScan provides variants of parallel prefix scan across a CUDA warp.

+
+ + + + + diff --git a/cub/docs/images/warp_scan_logo.png b/docs/html/warp_scan_logo.png similarity index 100% rename from cub/docs/images/warp_scan_logo.png rename to docs/html/warp_scan_logo.png diff --git a/docs/images/block_load_logo.png b/docs/images/block_load_logo.png new file mode 100644 index 0000000000..da484d29ab Binary files /dev/null and b/docs/images/block_load_logo.png differ diff --git a/docs/images/block_reduce.png b/docs/images/block_reduce.png new file mode 100644 index 0000000000..fa3a4a6b15 Binary files /dev/null and b/docs/images/block_reduce.png differ diff --git a/docs/images/block_scan_raking.png b/docs/images/block_scan_raking.png new file mode 100644 index 0000000000..48f6c55087 Binary files /dev/null and b/docs/images/block_scan_raking.png differ diff --git a/docs/images/block_scan_warpscans.png b/docs/images/block_scan_warpscans.png new file mode 100644 index 0000000000..9dd57673f9 Binary files /dev/null and b/docs/images/block_scan_warpscans.png differ diff --git a/docs/images/block_store_logo.png b/docs/images/block_store_logo.png new file mode 100644 index 0000000000..0fedda05a5 Binary files /dev/null and b/docs/images/block_store_logo.png differ diff --git a/docs/images/blocked.png b/docs/images/blocked.png new file mode 100644 index 0000000000..02d0d8464d Binary files /dev/null and b/docs/images/blocked.png differ diff --git a/cub/docs/images/cnp_abstraction.png b/docs/images/cnp_abstraction.png similarity index 100% rename from cub/docs/images/cnp_abstraction.png rename to docs/images/cnp_abstraction.png diff --git a/cub/docs/images/cub.png b/docs/images/cub.png similarity index 100% rename from cub/docs/images/cub.png rename to docs/images/cub.png diff --git a/docs/images/cub_overview.png b/docs/images/cub_overview.png new file mode 100644 index 0000000000..8a27c39f85 Binary files /dev/null and b/docs/images/cub_overview.png differ diff --git a/docs/images/devfun_abstraction.png b/docs/images/devfun_abstraction.png new file mode 100644 index 0000000000..aa01e213ee Binary files /dev/null and b/docs/images/devfun_abstraction.png differ diff --git a/docs/images/discont_logo.png b/docs/images/discont_logo.png new file mode 100644 index 0000000000..f7b68bcfaa Binary files /dev/null and b/docs/images/discont_logo.png differ diff --git a/docs/images/download-icon.png b/docs/images/download-icon.png new file mode 100644 index 0000000000..529e9c28d1 Binary files /dev/null and b/docs/images/download-icon.png differ diff --git a/docs/images/favicon.ico b/docs/images/favicon.ico new file mode 100644 index 0000000000..c29b6025bf Binary files /dev/null and b/docs/images/favicon.ico differ diff --git a/docs/images/favicon.png b/docs/images/favicon.png new file mode 100644 index 0000000000..48be989564 Binary files /dev/null and b/docs/images/favicon.png differ diff --git a/docs/images/github-icon-747d8b799a48162434b2c0595ba1317e.png b/docs/images/github-icon-747d8b799a48162434b2c0595ba1317e.png new file mode 100644 index 0000000000..0d45984c8a Binary files /dev/null and b/docs/images/github-icon-747d8b799a48162434b2c0595ba1317e.png differ diff --git a/docs/images/groups-icon.png b/docs/images/groups-icon.png new file mode 100644 index 0000000000..25b79f3a7b Binary files /dev/null and b/docs/images/groups-icon.png differ diff --git a/docs/images/kernel_abstraction.png b/docs/images/kernel_abstraction.png new file mode 100644 index 0000000000..aae657c6f2 Binary files /dev/null and b/docs/images/kernel_abstraction.png differ diff --git a/cub/docs/images/kogge_stone_reduction.png b/docs/images/kogge_stone_reduction.png similarity index 100% rename from cub/docs/images/kogge_stone_reduction.png rename to docs/images/kogge_stone_reduction.png diff --git a/docs/images/kogge_stone_scan.png b/docs/images/kogge_stone_scan.png new file mode 100644 index 0000000000..437e5006ec Binary files /dev/null and b/docs/images/kogge_stone_scan.png differ diff --git a/docs/images/raking.png b/docs/images/raking.png new file mode 100644 index 0000000000..206014bd63 Binary files /dev/null and b/docs/images/raking.png differ diff --git a/docs/images/reduce_logo.png b/docs/images/reduce_logo.png new file mode 100644 index 0000000000..7c211160f7 Binary files /dev/null and b/docs/images/reduce_logo.png differ diff --git a/docs/images/scan_logo.png b/docs/images/scan_logo.png new file mode 100644 index 0000000000..d5c2ab2562 Binary files /dev/null and b/docs/images/scan_logo.png differ diff --git a/docs/images/simt_abstraction.png b/docs/images/simt_abstraction.png new file mode 100644 index 0000000000..bf6aa33fa4 Binary files /dev/null and b/docs/images/simt_abstraction.png differ diff --git a/cub/docs/images/sorting_logo.jpg b/docs/images/sorting_logo.jpg similarity index 100% rename from cub/docs/images/sorting_logo.jpg rename to docs/images/sorting_logo.jpg diff --git a/docs/images/sorting_logo.png b/docs/images/sorting_logo.png new file mode 100644 index 0000000000..42b7d097f6 Binary files /dev/null and b/docs/images/sorting_logo.png differ diff --git a/docs/images/striped.png b/docs/images/striped.png new file mode 100644 index 0000000000..7827b20fa4 Binary files /dev/null and b/docs/images/striped.png differ diff --git a/docs/images/tab_b_alt.png b/docs/images/tab_b_alt.png new file mode 100644 index 0000000000..e3e58fbaaf Binary files /dev/null and b/docs/images/tab_b_alt.png differ diff --git a/cub/docs/images/thread_data_1.png b/docs/images/thread_data_1.png similarity index 100% rename from cub/docs/images/thread_data_1.png rename to docs/images/thread_data_1.png diff --git a/docs/images/tile.png b/docs/images/tile.png new file mode 100644 index 0000000000..f8dc435017 Binary files /dev/null and b/docs/images/tile.png differ diff --git a/docs/images/transpose_logo.png b/docs/images/transpose_logo.png new file mode 100644 index 0000000000..6488068d71 Binary files /dev/null and b/docs/images/transpose_logo.png differ diff --git a/docs/images/warp_scan_logo.png b/docs/images/warp_scan_logo.png new file mode 100644 index 0000000000..8799fb21c1 Binary files /dev/null and b/docs/images/warp_scan_logo.png differ diff --git a/docs/mainpage.dox b/docs/mainpage.dox new file mode 100644 index 0000000000..408117ca5a --- /dev/null +++ b/docs/mainpage.dox @@ -0,0 +1,423 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + + + +/** + * \mainpage + * + * \tableofcontents + * + * \htmlonly + * + *    + * Download CUB! + *
+ * + *    + * Browse or fork CUB at GitHub! + *
+ * + *    + * Join the cub-users discussion forum! + * \endhtmlonly + * + * \section sec0 (1) What is CUB? + * + * \par + * CUB is a library of high-performance parallel primitives and other utilities for + * constructing CUDA kernel software. CUB enhances productivity, performance, and portability + * by providing an abstraction layer over complex + * [block-level] (http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programming-model), + * [warp-level] (http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#hardware-implementation), and + * [thread-level](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#programming-model) operations. + * + * \par + * CUB's primitives are not bound to any particular width of parallelism or to any particular + * data type. This allows them to be flexible and tunable to fit your kernels' needs. + * Thus CUB is [CUDA Unbound](index.html). + * + * \image html cub_overview.png + * + * \par + * Browse our collections of: + * - [Cooperative primitives](group___simt_coop.html), including: + * - Thread block operations (e.g., radix sort, prefix scan, reduction, etc.) + * - Warp operations (e.g., prefix scan) + * - [SIMT utilities](group___simt_utils.html), including: + * - Tile-based I/O utilities (e.g., for performing {vectorized, coalesced} data movement of {blocked, striped} data tiles) + * - Low-level thread I/O using cache-modifiers + * - Abstractions for thread block work distribution (e.g., work-stealing, even-share, etc.) + * - [Host utilities](group___host_util.html), including: + * - Caching allocator for quick management of device temporaries + * - Device reflection + * + * \section sec2 (2) Recent news + * + * \par + * - [CUB v0.9.1](download_cub.html) (03/09/2013). Intial "preview" release. + * CUB is the first durable, high-performance library + * of cooperative block-level, warp-level, and thread-level primitives for CUDA + * kernel programming. More primitives and examples coming soon! + * + * \section sec3 (3) A simple example + * + * \par + * The following code snippet illustrates a simple CUDA kernel for sorting a thread block's data: + * + * \par + * \code + * #include + * + * // An tile-sorting CUDA kernel + * template < + * int BLOCK_THREADS, // Threads per block + * int ITEMS_PER_THREAD, // Items per thread + * typename T> // Numeric data type + * __global__ void TileSortKernel(T *d_in, T *d_out) + * { + * using namespace cub; + * const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + * + * // Parameterize cub::BlockRadixSort for the parallel execution context + * typedef BlockRadixSort BlockRadixSort; + * + * // Declare the shared memory needed by BlockRadixSort + * __shared__ typename BlockRadixSort::SmemStorage smem_storage; + * + * // A segment of data items per thread + * T data[ITEMS_PER_THREAD]; + * + * // Load a tile of data using vector-load instructions + * BlockLoadVectorized(data, d_in + (blockIdx.x * TILE_SIZE)); + * + * // Sort data in ascending order + * BlockRadixSort::SortBlocked(smem_storage, data); + * + * // Store the sorted tile using vector-store instructions + * BlockStoreVectorized(data, d_out + (blockIdx.x * TILE_SIZE)); + * } + * \endcode + * + * \par + * The cub::BlockRadixSort type performs a cooperative radix sort across the + * thread block's data items. Its implementation is parameterized by the number of threads per block and the aggregate + * data type \p T and is specialized for the underlying architecture. + * + * \par + * Once instantiated, the cub::BlockRadixSort type exposes an opaque cub::BlockRadixSort::SmemStorage + * member type. The thread block uses this storage type to allocate the shared memory needed by the + * primitive. This storage type can be aliased or union'd with other types so that the + * shared memory can be reused for other purposes. + * + * \par + * Furthermore, the kernel uses CUB's primitives for vectorizing global + * loads and stores. For example, lower-level ld.global.v4.s32 + * [PTX instructions](http://docs.nvidia.com/cuda/parallel-thread-execution) + * will be generated when \p T = \p int and \p ITEMS_PER_THREAD is a multiple of 4. + * + * \section sec4 (4) Why do you need CUB? + * + * \par + * CUDA kernel software is where the complexity of parallelism is expressed. + * Programmers must reason about deadlock, livelock, synchronization, race conditions, + * shared memory layout, plurality of state, granularity, throughput, latency, + * memory bottlenecks, etc. Constructing and fine-tuning kernel code is perhaps the + * most challenging, time-consuming aspect of CUDA programming. + * + * \par + * However, with the exception of CUB, there are few (if any) software libraries of + * reusable kernel primitives. In the CUDA ecosystem, CUB is unique in this regard. + * As a [SIMT](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#hardware-implementation) + * library and software abstraction layer, CUB provides: + * -# Simplicity of composition. Parallel CUB primitives can be simply sequenced + * together in kernel code. (This convenience is analogous to programming with + * [Thrust](http://thrust.github.com/) primitives in the host program.) + * -# High performance. CUB simplifies high performance kernel development by + * taking care to implement and make available the fastest available algorithms, + * strategies, and techniques. + * -# Performance portability. CUB primitives are specialized to match + * the target hardware. Furthermore, the CUB library continually evolves to accommodate new + * algorithmic developments, hardware instructions, etc. + * -# Simplicity of performance tuning. CUB primitives provide parallel abstractions + * whose performance behavior can be statically tuned. For example, most CUB primitives + * support alternative algorithmic strategies and variable grain sizes (threads per block, + * items per thread, etc.). + * -# Robustness and durability. CUB primitives are designed to function properly for + * arbitrary data types and widths of parallelism (not just for the built-in C++ types + * or for powers-of-two threads per block). + * + * \section sec5 (5) Where is CUB positioned in the CUDA ecosystem? + * + * \par + * CUDA's programming model embodies three different levels of program execution, each + * engendering its own abstraction layer in the CUDA software stack (i.e., the "black boxes" + * below): + * + * + * + * + * + * + * + * + * + * + *
+ * \par + * CUDA kernel. A single CPU thread invokes a CUDA kernel to perform + * some data-parallel function. The incorporation of entire kernels (and their + * corresponding invocation stubs) into libraries is the most common form of code reuse for + * CUDA. Libraries of CUDA kernels include the following: + * - [cuBLAS](https://developer.nvidia.com/cublas) + * - [cuFFT](https://developer.nvidia.com/cufft) + * - [cuSPARSE](https://developer.nvidia.com/cusparse) + * - [Thrust](http://thrust.github.com/) + * + * \htmlonly + * + * \endhtmlonly + *
+ * \par + * Thread blocks (SIMT). Each kernel invocation comprises some number of parallel + * threads. Threads are grouped into blocks, and the entire block of threads invokes some cooperative + * function in which they communicate and synchronize with each other. There has historically been very + * little reuse of cooperative SIMT software within CUDA kernel. Libraries of thread-block primitives + * include the following: + * - [CUB](index.html) + * + * \htmlonly + * + * \endhtmlonly + *
+ * \par + * CUDA thread. A single CUDA thread invokes some sequential function. + * This is the finest-grained level of CUDA software abstraction and requires + * no consideration for the scheduling or synchronization of parallel threads. CUDA libraries of + * purely data-parallel functions include the following: + * - [ CUDA Math](http://docs.nvidia.com/cuda/cuda-math-api/index.html), + * [Texture](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#texture-functions), and + * [Atomic](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#atomic-functions) APIs + * - [cuRAND](https://developer.nvidia.com/curand)'s device-code interface + * - [CUB](index.html) + * + * \htmlonly + * + * \endhtmlonly + *
+ * + * + * \section sec6 (6) How does CUB work? + * + * \par + * CUB leverages the following programming idioms: + * -# [C++ templates](index.html#sec3sec1) + * -# [Reflective type structure](index.html#sec3sec2) + * -# [Flexible data mapping](index.html#sec3sec3) + * + * \subsection sec3sec1 6.1    C++ templates + * + * \par + * As a SIMT library, CUB must be flexible enough to accommodate a wide spectrum + * of parallel execution contexts, + * i.e., specific: + * - Data types + * - Widths of parallelism (threads per block) + * - Grain sizes (data items per thread) + * - Underlying architectures (special instructions, warp size, rules for bank conflicts, etc.) + * - Tuning requirements (e.g., latency vs. throughput) + * + * \par + * To provide this flexibility, CUB is implemented as a C++ template library. + * C++ templates are a way to write generic algorithms and data structures. + * There is no need to build CUB separately. You simply \#include the + * cub.cuh header file into your .cu CUDA C++ sources + * and compile with NVIDIA's nvcc compiler. + * + * \subsection sec3sec2 6.2    Reflective type structure + * + * \par + * Cooperation within a thread block requires shared memory for communicating between threads. + * However, the specific size and layout of the memory needed by a given + * primitive will be specific to the details of its parallel execution context (e.g., how + * many threads are calling into it, how many items are processed per thread, etc.). Furthermore, + * this shared memory must be allocated outside of the component itself if it is to be + * reused elsewhere by the thread block. + * + * \par + * \code + * // Parameterize a BlockScan type for use with 128 threads + * // and 4 items per thread + * typedef cub::BlockScan BlockScan; + * + * // Declare shared memory for BlockScan + * __shared__ typename BlockScan::SmemStorage smem_storage; + * + * // A segment of consecutive input items per thread + * int data[4]; + * + * // Obtain data in blocked order + * ... + * + * // Perform an exclusive prefix sum across the tile of data + * BlockScan::ExclusiveSum(smem_storage, data, data); + * + * \endcode + * + * \par + * To address this issue, we encapsulate cooperative procedures within + * reflective type structure (C++ classes). As illustrated in the + * cub::BlockScan example above, these primitives are C++ classes with + * interfaces that expose both: + * - Procedural entrypoints for a block of threads to invoke + * - An opaque shared memory type needed for the operation of those methods + * + * \subsection sec3sec3 6.3    Flexible data mapping + * + * \par + * We often design kernels such that each thread block is assigned a "tile" of data + * items for processing. + * + * \par + * \image html tile.png + *
Tile of eight ordered data items
+ + * \par + * When the tile size equals the thread block size, the + * mapping of data onto threads is straightforward (one datum per thread). + * However, there are often performance advantages for processing more + * than one datum per thread. For these scenarios, CUB primitives + * support the following alternatives for partitioning data items across + * the block of threads: + * + * + * + * + * + * + * + * + *
+ * \par + * - Blocked arrangement. The aggregate tile of items is partitioned + * evenly across threads in "blocked" fashion with threadi + * owning the ith segment of consecutive elements. + * Blocked arrangements are often desirable for algorithmic benefits (where + * long sequences of items can be processed sequentially within each thread). + * + * \par + * \image html blocked.png + *
Blocked arrangement across four threads
(emphasis on items owned by thread0)
+ *
+ * \par + * - Striped arrangement. The aggregate tile of items is partitioned across + * threads in "striped" fashion, i.e., the \p ITEMS_PER_THREAD items owned by + * each thread have logical stride \p BLOCK_THREADS between them. Striped arrangements + * are often desirable for data movement through global memory (where + * [read/write coalescing](http://docs.nvidia.com/cuda/cuda-c-best-practices-guide/#coalesced-access-global-memory) + * is an important performance consideration). + * + * \par + * \image html striped.png + *
Striped arrangement across four threads
(emphasis on items owned by thread0)
+ *
+ * + * \par + * The benefits of processing multiple items per thread (a.k.a., register blocking, granularity coarsening, etc.) include: + * - Algorithmic efficiency. Sequential work over multiple items in + * thread-private registers is cheaper than synchronized, cooperative + * work through shared memory spaces. + * - Data occupancy. The number of items that can be resident on-chip in + * thread-private register storage is often greater than the number of + * schedulable threads. + * - Instruction-level parallelism. Multiple items per thread also + * facilitates greater ILP for improved throughput and utilization. + * + * \par + * Finally, cub::BlockExchange provides operations for converting between blocked + * and striped arrangements. + * + * \section sec7 (7) Contributors + * + * \par + * CUB is developed as an open-source project by [NVIDIA Research](http://research.nvidia.com). + * The primary contributor is [Duane Merrill](http://github.com/dumerrill). + * + * \section sec8 (8) Open Source License + * + * \par + * CUB is available under the "New BSD" open-source license: + * + * \par + * \code + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2013, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * \endcode + * + */ + + +/** + * \defgroup Simt SIMT Primitives + */ + +/** + * \defgroup SimtCoop Cooperative SIMT Operations + * \ingroup Simt + */ + +/** + * \defgroup SimtUtils SIMT Utilities + * \ingroup Simt + */ + +/** + * \defgroup HostUtil Host Utilities + */ diff --git a/cub/docs/references.bib b/docs/references.bib similarity index 100% rename from cub/docs/references.bib rename to docs/references.bib diff --git a/cub/test/.gitignore b/test/.gitignore similarity index 100% rename from cub/test/.gitignore rename to test/.gitignore diff --git a/cub/test/Makefile b/test/Makefile similarity index 100% rename from cub/test/Makefile rename to test/Makefile diff --git a/cub/test/test_allocator.cu b/test/test_allocator.cu similarity index 100% rename from cub/test/test_allocator.cu rename to test/test_allocator.cu diff --git a/cub/test/test_block_load_store.cu b/test/test_block_load_store.cu similarity index 100% rename from cub/test/test_block_load_store.cu rename to test/test_block_load_store.cu diff --git a/cub/test/test_block_radix_sort.cu b/test/test_block_radix_sort.cu similarity index 100% rename from cub/test/test_block_radix_sort.cu rename to test/test_block_radix_sort.cu diff --git a/cub/test/test_block_reduce.cu b/test/test_block_reduce.cu similarity index 81% rename from cub/test/test_block_reduce.cu rename to test/test_block_reduce.cu index acde3e0021..daa9825963 100644 --- a/cub/test/test_block_reduce.cu +++ b/test/test_block_reduce.cu @@ -51,21 +51,83 @@ bool g_verbose = false; // Test kernels //--------------------------------------------------------------------- + +/** + * Generic reduction + */ +template < + typename T, + typename ReductionOp, + bool PRIMITIVE = Traits::PRIMITIVE> +struct DeviceTest +{ + template < + typename BlockReduce, + int ITEMS_PER_THREAD> + static __device__ __forceinline__ T Test( + typename BlockReduce::SmemStorage &smem_storage, + T (&data)[ITEMS_PER_THREAD], + ReductionOp &reduction_op) + { + return BlockReduce::Reduce(smem_storage, data, reduction_op); + } + + template < typename BlockReduce> + static __device__ __forceinline__ T Test( + typename BlockReduce::SmemStorage &smem_storage, + T &data, + ReductionOp &reduction_op, + int valid_threads) + { + return BlockReduce::Reduce(smem_storage, data, reduction_op, valid_threads); + } +}; + + +/** + * Sum reduction (only compile for primitive, built-ins only) + */ +template +struct DeviceTest, true> +{ + template < + typename BlockReduce, + int ITEMS_PER_THREAD> + static __device__ __forceinline__ T Test( + typename BlockReduce::SmemStorage &smem_storage, + T (&data)[ITEMS_PER_THREAD], + Sum &reduction_op) + { + return BlockReduce::Sum(smem_storage, data); + } + + template + static __device__ __forceinline__ T Test( + typename BlockReduce::SmemStorage &smem_storage, + T &data, + Sum &reduction_op, + int valid_threads) + { + return BlockReduce::Sum(smem_storage, data, valid_threads); + } +}; + + /** * Test full-tile reduction kernel (where num_elements is an even * multiple of BLOCK_THREADS) */ template < - int BLOCK_THREADS, - int ITEMS_PER_THREAD, - typename T, - typename ReductionOp> + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename T, + typename ReductionOp> __launch_bounds__ (BLOCK_THREADS, 1) __global__ void FullTileReduceKernel( - T *d_in, - T *d_out, + T *d_in, + T *d_out, ReductionOp reduction_op, - int tiles) + int tiles) { const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; @@ -97,7 +159,7 @@ __global__ void FullTileReduceKernel( block_offset += TILE_SIZE; // Cooperatively reduce the tile's aggregate - T tile_aggregate = BlockReduce::Reduce(smem_storage, data, reduction_op); + T tile_aggregate = DeviceTest::template Test(smem_storage, data, reduction_op); // Reduce threadblock aggregate block_aggregate = reduction_op(block_aggregate, tile_aggregate); @@ -116,13 +178,13 @@ __global__ void FullTileReduceKernel( * Test partial-tile reduction kernel (where num_elements < BLOCK_THREADS) */ template < - int BLOCK_THREADS, - typename T, - typename ReductionOp> + int BLOCK_THREADS, + typename T, + typename ReductionOp> __launch_bounds__ (BLOCK_THREADS, 1) __global__ void PartialTileReduceKernel( - T *d_in, - T *d_out, + T *d_in, + T *d_out, int num_elements, ReductionOp reduction_op) { @@ -142,11 +204,7 @@ __global__ void PartialTileReduceKernel( } // Cooperatively reduce the tile's aggregate - T tile_aggregate = BlockReduce::Reduce( - smem_storage, - partial, - reduction_op, - num_elements); + T tile_aggregate = DeviceTest::template Test(smem_storage, partial, reduction_op, num_elements); // Store data if (threadIdx.x == 0) @@ -192,14 +250,14 @@ void Initialize( */ template < int BLOCK_THREADS, - int ITEMS_PER_THREAD, - typename T, - typename ReductionOp> + int ITEMS_PER_THREAD, + typename T, + typename ReductionOp> void TestFullTile( - int gen_mode, - int tiles, - ReductionOp reduction_op, - char *type_string) + int gen_mode, + int tiles, + ReductionOp reduction_op, + char *type_string) { const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; diff --git a/cub/test/test_block_scan.cu b/test/test_block_scan.cu similarity index 100% rename from cub/test/test_block_scan.cu rename to test/test_block_scan.cu diff --git a/cub/test/test_block_serialize.cu b/test/test_block_serialize.cu similarity index 100% rename from cub/test/test_block_serialize.cu rename to test/test_block_serialize.cu diff --git a/cub/test/test_coo_spmv.cu b/test/test_coo_spmv.cu similarity index 100% rename from cub/test/test_coo_spmv.cu rename to test/test_coo_spmv.cu diff --git a/cub/test/test_coo_spmv_double.cu b/test/test_coo_spmv_double.cu similarity index 100% rename from cub/test/test_coo_spmv_double.cu rename to test/test_coo_spmv_double.cu diff --git a/cub/test/test_grid_barrier.cu b/test/test_grid_barrier.cu similarity index 100% rename from cub/test/test_grid_barrier.cu rename to test/test_grid_barrier.cu diff --git a/cub/test/test_util.h b/test/test_util.h similarity index 100% rename from cub/test/test_util.h rename to test/test_util.h diff --git a/cub/test/test_warp_scan.cu b/test/test_warp_scan.cu similarity index 100% rename from cub/test/test_warp_scan.cu rename to test/test_warp_scan.cu