diff --git a/src/ConvNetSharp.Core/ConvNetSharp.Core.Nuget.csproj b/src/ConvNetSharp.Core/ConvNetSharp.Core.Nuget.csproj index 0ccb0329..e9858f42 100644 --- a/src/ConvNetSharp.Core/ConvNetSharp.Core.Nuget.csproj +++ b/src/ConvNetSharp.Core/ConvNetSharp.Core.Nuget.csproj @@ -1,8 +1,9 @@  - netstandard2.1 - + netstandard2.0 + 8.0 + 0.4.14 diff --git a/src/ConvNetSharp.Core/ConvNetSharp.Core.csproj b/src/ConvNetSharp.Core/ConvNetSharp.Core.csproj index 9e4cff38..e1ab2e78 100644 --- a/src/ConvNetSharp.Core/ConvNetSharp.Core.csproj +++ b/src/ConvNetSharp.Core/ConvNetSharp.Core.csproj @@ -1,7 +1,8 @@  - netstandard2.1 + netstandard2.0 + 8.0 diff --git a/src/ConvNetSharp.Core/Net.cs b/src/ConvNetSharp.Core/Net.cs index 449336a5..1f25ba9b 100644 --- a/src/ConvNetSharp.Core/Net.cs +++ b/src/ConvNetSharp.Core/Net.cs @@ -27,7 +27,8 @@ public T GetCostLoss(Volume input, Volume y) { this.Forward(input); - if (this.Layers[^1] is ILastLayer lastLayer) + var n = this.Layers.Count; + if (this.Layers[n - 1] is ILastLayer lastLayer) { lastLayer.Backward(y, out var loss); return loss; @@ -58,7 +59,8 @@ public int[] GetPrediction() { // this is a convenience function for returning the argmax // prediction, assuming the last layer of the net is a softmax - if (!(this.Layers[^1] is SoftmaxLayer softmaxLayer)) + var ln = this.Layers.Count; + if (!(this.Layers[ln - 1] is SoftmaxLayer softmaxLayer)) { throw new Exception("GetPrediction function assumes softmax as last layer of the net!"); } @@ -109,10 +111,11 @@ public void AddLayer(LayerBase layer) if (this.Layers.Count > 0) { - inputWidth = this.Layers[^1].OutputWidth; - inputHeight = this.Layers[^1].OutputHeight; - inputDepth = this.Layers[^1].OutputDepth; - lastLayer = this.Layers[^1]; + var n = this.Layers.Count; + inputWidth = this.Layers[n - 1].OutputWidth; + inputHeight = this.Layers[n - 1].OutputHeight; + inputDepth = this.Layers[n - 1].OutputDepth; + lastLayer = this.Layers[n - 1]; } else if (!(layer is InputLayer)) { diff --git a/src/ConvNetSharp.Flow/ConvNetSharp.Flow.Nuget.csproj b/src/ConvNetSharp.Flow/ConvNetSharp.Flow.Nuget.csproj index 75d59359..15f23971 100644 --- a/src/ConvNetSharp.Flow/ConvNetSharp.Flow.Nuget.csproj +++ b/src/ConvNetSharp.Flow/ConvNetSharp.Flow.Nuget.csproj @@ -1,8 +1,9 @@  - netstandard2.1 - + netstandard2.0 + 8.0 + 0.4.14 diff --git a/src/ConvNetSharp.Flow/ConvNetSharp.Flow.csproj b/src/ConvNetSharp.Flow/ConvNetSharp.Flow.csproj index b8e2222e..05ff697e 100644 --- a/src/ConvNetSharp.Flow/ConvNetSharp.Flow.csproj +++ b/src/ConvNetSharp.Flow/ConvNetSharp.Flow.csproj @@ -1,8 +1,9 @@  - netstandard2.1 - + netstandard2.0 + 8.0 + diff --git a/src/ConvNetSharp.Utils/ConvNetSharp.Utils.Nuget.csproj b/src/ConvNetSharp.Utils/ConvNetSharp.Utils.Nuget.csproj index 3bfc323a..2c32f840 100644 --- a/src/ConvNetSharp.Utils/ConvNetSharp.Utils.Nuget.csproj +++ b/src/ConvNetSharp.Utils/ConvNetSharp.Utils.Nuget.csproj @@ -1,7 +1,8 @@  - netstandard2.1 + netstandard2.0 + 8.0 diff --git a/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.Nuget.csproj b/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.Nuget.csproj index 4cf7fc84..432df6d4 100644 --- a/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.Nuget.csproj +++ b/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.Nuget.csproj @@ -1,7 +1,8 @@  - netstandard2.1 + netstandard2.0 + 8.0 diff --git a/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.csproj b/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.csproj index 15e8bb3f..c6fbf541 100644 --- a/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.csproj +++ b/src/ConvNetSharp.Volume.GPU/ConvNetSharp.Volume.GPU.csproj @@ -1,6 +1,7 @@  - netstandard2.1 + netstandard2.0 + 8.0 true diff --git a/src/ConvNetSharp.Volume.GPU/Double/Volume.cs b/src/ConvNetSharp.Volume.GPU/Double/Volume.cs index 548ccfb2..0e98e70e 100644 --- a/src/ConvNetSharp.Volume.GPU/Double/Volume.cs +++ b/src/ConvNetSharp.Volume.GPU/Double/Volume.cs @@ -288,10 +288,13 @@ public override void Convolution(Volume filters, int xpad, int ypad, int result.Shape.Dimensions[1], result.Shape.Dimensions[0]); - var algo = this._context.CudnnContext.GetConvolutionForwardAlgorithm( - dataDesc, filterDesc, - convolutionDesc, outputDesc, - cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero); + var algo = this._context.CudnnContext.FindConvolutionForwardAlgorithm( + dataDesc, + filterDesc, + convolutionDesc, + outputDesc, + 1 + ).First().algo; var workspaceSize = this._context.CudnnContext.GetConvolutionForwardWorkspaceSize( dataDesc, filterDesc, @@ -373,14 +376,24 @@ public override void Convolution(Volume filters, int xpad, int ypad, int filters.Shape.Dimensions[1], filters.Shape.Dimensions[0]); - var filterAlgo = this._context.CudnnContext.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc, - convolutionDesc, dfilterDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero); + var filterAlgo = this._context.CudnnContext.FindConvolutionBackwardFilterAlgorithm( + dataDesc, + dOutputDesc, + convolutionDesc, + dfilterDesc, + 1 + ).First().algo; var filterWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(dataDesc, dOutputDesc, convolutionDesc, dfilterDesc, filterAlgo); filterWorkspaceSize = filterWorkspaceSize == 0 ? new SizeT(1) : filterWorkspaceSize; - var dataAlgo = this._context.CudnnContext.GetConvolutionBackwardDataAlgorithm(filterDesc, dOutputDesc, - convolutionDesc, dDataDesc, cudnnConvolutionBwdDataPreference.PreferFastest, IntPtr.Zero); + var dataAlgo = this._context.CudnnContext.FindConvolutionBackwardDataAlgorithm( + filterDesc, + dOutputDesc, + convolutionDesc, + dDataDesc, + 1 + ).First().algo; var dataWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardDataWorkspaceSize(dfilterDesc, dOutputDesc, convolutionDesc, dDataDesc, dataAlgo); dataWorkspaceSize = dataWorkspaceSize == 0 ? new SizeT(1) : dataWorkspaceSize; diff --git a/src/ConvNetSharp.Volume.GPU/Single/Volume.cs b/src/ConvNetSharp.Volume.GPU/Single/Volume.cs index 29ef7999..754a8ba4 100644 --- a/src/ConvNetSharp.Volume.GPU/Single/Volume.cs +++ b/src/ConvNetSharp.Volume.GPU/Single/Volume.cs @@ -287,10 +287,13 @@ public override void Convolution(Volume filters, int xpad, int ypad, int result.Shape.Dimensions[1], result.Shape.Dimensions[0]); - var algo = this._context.CudnnContext.GetConvolutionForwardAlgorithm( - dataDesc, filterDesc, - convolutionDesc, outputDesc, - cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero); + var algo = this._context.CudnnContext.FindConvolutionForwardAlgorithm( + dataDesc, + filterDesc, + convolutionDesc, + outputDesc, + 1 + ).First().algo; var workspaceSize = this._context.CudnnContext.GetConvolutionForwardWorkspaceSize( dataDesc, filterDesc, @@ -379,14 +382,24 @@ public override void Convolution(Volume filters, int xpad, int ypad, int filters.Shape.Dimensions[1], filters.Shape.Dimensions[0]); - var filterAlgo = this._context.CudnnContext.GetConvolutionBackwardFilterAlgorithm(dataDesc, dOutputDesc, - convolutionDesc, dfilterDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero); + var filterAlgo = this._context.CudnnContext.FindConvolutionBackwardFilterAlgorithm( + dataDesc, + dOutputDesc, + convolutionDesc, + dfilterDesc, + 1 + ).First().algo; var filterWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(dataDesc, dOutputDesc, convolutionDesc, dfilterDesc, filterAlgo); filterWorkspaceSize = filterWorkspaceSize == 0 ? new SizeT(1) : filterWorkspaceSize; - var dataAlgo = this._context.CudnnContext.GetConvolutionBackwardDataAlgorithm(filterDesc, dOutputDesc, - convolutionDesc, dDataDesc, cudnnConvolutionBwdDataPreference.PreferFastest, IntPtr.Zero); + var dataAlgo = this._context.CudnnContext.FindConvolutionBackwardDataAlgorithm( + filterDesc, + dOutputDesc, + convolutionDesc, + dDataDesc, + 1 + ).First().algo; var dataWorkspaceSize = this._context.CudnnContext.GetConvolutionBackwardDataWorkspaceSize(dfilterDesc, dOutputDesc, convolutionDesc, dDataDesc, dataAlgo); dataWorkspaceSize = dataWorkspaceSize == 0 ? new SizeT(1) : dataWorkspaceSize; diff --git a/src/ConvNetSharp.Volume/ConvNetSharp.Volume.csproj b/src/ConvNetSharp.Volume/ConvNetSharp.Volume.csproj index 1f4da1ab..efa484f9 100644 --- a/src/ConvNetSharp.Volume/ConvNetSharp.Volume.csproj +++ b/src/ConvNetSharp.Volume/ConvNetSharp.Volume.csproj @@ -1,7 +1,8 @@  - netstandard2.1 + netstandard2.0 + 8.0 diff --git a/src/external/ManagedCuda/CudaBlas.XML b/src/external/ManagedCuda/CudaBlas.XML index 14b922bc..0ce02089 100644 --- a/src/external/ManagedCuda/CudaBlas.XML +++ b/src/external/ManagedCuda/CudaBlas.XML @@ -21,6 +21,10 @@ + + + + @@ -53,6 +57,14 @@ + + + + + + + + copies n elements from a vector x in CPU memory space to a vector y @@ -163,6 +175,10 @@ , . + + + + @@ -195,6 +211,10 @@ + + + + @@ -311,6 +331,10 @@ + + + + @@ -327,6 +351,14 @@ + + + + + + + + @@ -367,6 +399,10 @@ + + + + @@ -383,6 +419,10 @@ + + + + @@ -391,6 +431,10 @@ + + + + @@ -399,6 +443,10 @@ + + + + @@ -515,6 +563,10 @@ + + + + @@ -531,6 +583,14 @@ + + + + + + + + @@ -571,6 +631,10 @@ + + + + @@ -587,6 +651,10 @@ + + + + @@ -595,6 +663,10 @@ + + + + @@ -603,6 +675,10 @@ + + + + @@ -1139,11 +1215,11 @@ - + - + @@ -1563,11 +1639,11 @@ - + - + @@ -1939,11 +2015,33 @@ + + + + + + + + + + + + + This function copies the vector x into the vector y. + + + + + + + + + This function copies the vector x into the vector y. @@ -2088,6 +2186,18 @@ + + + This function interchanges the elements of vector x and y. + + + + + + + + + This function computes the Euclidean norm of the vector x. @@ -2648,6 +2758,17 @@ + + + This function finds the (smallest) index of the element of the minimum magnitude. + First index starts at 1 (Fortran notation) + + + + + + + This function finds the (smallest) index of the element of the maximum magnitude. @@ -2752,6 +2873,17 @@ + + + This function finds the (smallest) index of the element of the maximum magnitude. + First index starts at 1 (Fortran notation) + + + + + + + This function computes the sum of the absolute values of the elements of vector x. @@ -2775,6 +2907,18 @@ + + + This function computes the sum of the absolute values of the elements of vector x. + + + + + + + + + This function computes the sum of the absolute values of the elements of vector x. @@ -2976,6 +3120,22 @@ Cosine component Sine component + + + This function applies Givens rotation matrix G = |c s; -s c| to vectors x and y. + + + + + + + + + Cosine component + Sine component + + + This function constructs the Givens rotation matrix G = |c s; -s c| that zeros out the second entry of a 2x1 vector (a; b)T @@ -3039,6 +3199,18 @@ Cosine component Sine component + + + This function constructs the Givens rotation matrix G = |c s; -s c| that zeros out the second entry of a 2x1 vector (a; b)T + + + + + Cosine component + Sine component + + + This function constructs the Givens rotation matrix G = |c s; -s c| that zeros out the second entry of a 2x1 vector (a; b)T @@ -3116,6 +3288,28 @@ + + + This function applies the modified Givens transformation H = |h11 h12; h21 h22| to vectors x and y. + The elements h11, h21, h12 and h22 of 2x2 matrix H are stored in param[1], param[2], param[3] and param[4], respectively. + The flag = param[0] defines the following predefined values for the matrix H entries: + flag=-1.0: H = |h11 h12; h21 h22| + flag= 0.0: H = |1.0 h12; h21 1.0| + flag= 1.0: H = |h11 1.0; -1.0 h22| + flag=-2.0: H = |1.0 0.0; 0.0 1.0| + Notice that the values -1.0, 0.0 and 1.0 implied by the flag are not stored in param. + + + + + + + + + + + + This function constructs the modified Givens transformation H = |h11 h12; h21 h22| that zeros out the second entry of a 2x1 vector @@ -3170,6 +3364,30 @@ + + + This function constructs the modified Givens transformation H = |h11 h12; h21 h22| that zeros out the second entry of a 2x1 vector + [sqrt(d1)*x1; sqrt(d2)*y1]. + The elements h11, h21, h12 and h22 of 2x2 matrix H are stored in param[1], param[2], param[3] and param[4], respectively. + The flag = param[0] defines the following predefined values for the matrix H entries: + flag=-1.0: H = |h11 h12; h21 h22| + flag= 0.0: H = |1.0 h12; h21 1.0| + flag= 1.0: H = |h11 1.0; -1.0 h22| + flag=-2.0: H = |1.0 0.0; 0.0 1.0| + Notice that the values -1.0, 0.0 and 1.0 implied by the flag are not stored in param. + + + + + + + + + + + + + This function constructs the modified Givens transformation H = |h11 h12; h21 h22| that zeros out the second entry of a 2x1 vector @@ -6638,7 +6856,7 @@ leading dimension of two-dimensional array used to store each matrix C[i]. number of pointers contained in A, B and C. - + This function performs the matrix-matrix multiplications of an array of matrices. where and are scalars, and , and are arrays of pointers to matrices stored @@ -6673,7 +6891,7 @@ - + This function performs the matrix-matrix multiplications of an array of matrices. where and are scalars, and , and are arrays of pointers to matrices stored @@ -8315,6 +8533,11 @@ the upper part of the matrix is filled + + + Full + + The DiagType type indicates whether the main diagonal of the dense matrix is @@ -8374,6 +8597,16 @@ the conjugate transpose operation is selected + + + synonym of ConjugateTranspose + + + + + the conjugate operation is selected + + The PointerMode type indicates whether the scalar values are passed by @@ -8602,6 +8835,85 @@ + + + same as using matching _PEDANTIC compute type when using cublas routine calls or cublasEx() calls with cudaDataType as compute type + + + + + allow accelerating single precision routines using TF32 tensor cores + + + + + flag to force any reductons to use the accumulator type and not output type in case of mixed precision routines with lower size output type + + + + + Enum for compute type + - default types provide best available performance using all available hardware features + and guarantee internal storage precision with at least the same precision and range; + - _PEDANTIC types ensure standard arithmetic and exact specified internal storage format; + - _FAST types allow for some loss of precision to enable higher throughput arithmetic. + + + + + half - default + + + + + half - pedantic + + + + + float - default + + + + + float - pedantic + + + + + float - fast, allows down-converting inputs to half or TF32 + + + + + float - fast, allows down-converting inputs to bfloat16 or TF32 + + + + + float - fast, allows down-converting inputs to TF32 + + + + + double - default + + + + + double - pedantic + + + + + signed 32-bit int - default + + + + + signed 32-bit int - pedantic + + The cublasDataType_t type is an enumerant to specify the data precision. It is used diff --git a/src/external/ManagedCuda/CudaBlas.dll b/src/external/ManagedCuda/CudaBlas.dll index 8787bc7e..878d70b4 100644 Binary files a/src/external/ManagedCuda/CudaBlas.dll and b/src/external/ManagedCuda/CudaBlas.dll differ diff --git a/src/external/ManagedCuda/CudaDNN.XML b/src/external/ManagedCuda/CudaDNN.XML index 63dbd98d..f3e49582 100644 --- a/src/external/ManagedCuda/CudaDNN.XML +++ b/src/external/ManagedCuda/CudaDNN.XML @@ -55,6 +55,101 @@ mode is set to CUDNN_ACTIVATION_CLIPPED_RELU or to specify the alpha coefficient when the activation mode is set to CUDNN_ACTIVATION_ELU. + + + + + + + + An opaque structure holding the description of an activation operation. + + + + + An opaque structure holding the description of an activation operation. + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handles. + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + An opaque structure holding the @@ -200,6 +295,11 @@ This function allows the user to specify the number of groups to be used in the associated convolution. + + + This function allows the user to specify the number of groups to be used in the associated convolution. + + @@ -236,6 +336,11 @@ Math precision. + + + + + This function returns the ctc costs and gradients, given the probabilities and labels. @@ -282,6 +387,16 @@ Amount of GPU memory needed as workspace to be able to execute the CTC loss computation with the specified algo. + + + + + + + + + + An opaque structure holding the cuDNN library context. @@ -798,6 +913,44 @@ Pointer to data of the tensor described by the dxDesc descriptor. Data pointer to GPU memory used by this function. It is expected that contents of reserveSpace doe not change between cudnnDropoutForward and cudnnDropoutBackward calls. + + + This function attempts all available cuDNN algorithms for cudnnConvolutionForward, using + user-allocated GPU memory, and outputs performance metrics to a user-allocated array of + cudnnConvolutionFwdAlgoPerf_t. These metrics are written in sorted fashion where the first + element has the lowest compute time. The workspace size should be the largest workspace you + can spare in device memory; the size of this workspace will determine the availablity of + the convolution algorithms. + + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor xDesc. + Handle to a previously initialized filter descriptor. + Data pointer to GPU memory associated with the filter descriptor wDesc. + Previously initialized convolution descriptor. + Handle to the previously initialized output tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor yDesc. The content of this tensor will be overwritten with arbitary values. + The maximum number of elements to be stored in perfResults. + Data pointer to GPU memory that is a necessary workspace for some algorithms. The size of this workspace will determine the availability of algorithms. A nil pointer is considered a workSpace of 0 bytes. + + + + This function attempts all cuDNN algorithms for cudnnConvolutionBackwardFilter, + using user-allocated GPU memory, and outputs performance metrics to a + user-allocated array of cudnnConvolutionBwdFilterAlgoPerf_t. These metrics are + written in sorted fashion where the first element has the lowest compute time. The + workspace size should be the largest workspace you can spare in device memory; the + size of this workspace will determine the availablity of convolution algorithms. + + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the filter descriptor xDesc. + Handle to the previously initialized input differential tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor dyDesc. + Previously initialized convolution descriptor. + Handle to a previously initialized filter descriptor. + Data pointer to GPU memory associated with the filter descriptor dwDesc.The content of this tensor will be overwritten with arbitary values. + The maximum number of elements to be stored in perfResults. + Data pointer to GPU memory that is a necessary workspace for some algorithms. The size of this workspace will determine the availabilty of algorithms. A nil pointer is considered a workSpace of 0 bytes. + This function copies the scaled data from one tensor to another tensor with a different @@ -1261,6 +1414,44 @@ Pointer to data of the tensor described by the dxDesc descriptor. Data pointer to GPU memory used by this function. It is expected that contents of reserveSpace doe not change between cudnnDropoutForward and cudnnDropoutBackward calls. + + + This function attempts all available cuDNN algorithms for cudnnConvolutionForward, using + user-allocated GPU memory, and outputs performance metrics to a user-allocated array of + cudnnConvolutionFwdAlgoPerf_t. These metrics are written in sorted fashion where the first + element has the lowest compute time. The workspace size should be the largest workspace you + can spare in device memory; the size of this workspace will determine the availablity of + the convolution algorithms. + + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor xDesc. + Handle to a previously initialized filter descriptor. + Data pointer to GPU memory associated with the filter descriptor wDesc. + Previously initialized convolution descriptor. + Handle to the previously initialized output tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor yDesc. The content of this tensor will be overwritten with arbitary values. + The maximum number of elements to be stored in perfResults. + Data pointer to GPU memory that is a necessary workspace for some algorithms. The size of this workspace will determine the availability of algorithms. A nil pointer is considered a workSpace of 0 bytes. + + + + This function attempts all cuDNN algorithms for cudnnConvolutionBackwardFilter, + using user-allocated GPU memory, and outputs performance metrics to a + user-allocated array of cudnnConvolutionBwdFilterAlgoPerf_t. These metrics are + written in sorted fashion where the first element has the lowest compute time. The + workspace size should be the largest workspace you can spare in device memory; the + size of this workspace will determine the availablity of convolution algorithms. + + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the filter descriptor xDesc. + Handle to the previously initialized input differential tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor dyDesc. + Previously initialized convolution descriptor. + Handle to a previously initialized filter descriptor. + Data pointer to GPU memory associated with the filter descriptor dwDesc.The content of this tensor will be overwritten with arbitary values. + The maximum number of elements to be stored in perfResults. + Data pointer to GPU memory that is a necessary workspace for some algorithms. The size of this workspace will determine the availabilty of algorithms. A nil pointer is considered a workSpace of 0 bytes. + @@ -1292,26 +1483,6 @@ The maximum number of elements to be stored in perfResults. An array to store performance metrics sorted ascending by compute time. - - - This function serves as a heuristic for obtaining the best suited algorithm for - cudnnConvolutionForward for the given layer specifications. Based on the input - preference, this function will either return the fastest algorithm or the fastest algorithm - within a given memory limit. For an exhaustive search for the fastest algorithm, please - use cudnnFindConvolutionForwardAlgorithm. - - Handle to the previously initialized input tensor descriptor. - Handle to a previously initialized filter descriptor. - Previously initialized convolution descriptor. - Handle to the previously initialized output tensor descriptor. - Enumerant to express the preference criteria in terms of memory - requirement and speed. - It is used when enumerant preference is set to - CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT to specify the - maximum amount of GPU memory the user is willing to use as a workspace - Enumerant that specifies which convolution algorithm should be used to - compute the results according to the specified preference - This function serves as a heuristic for obtaining the best suited algorithm for @@ -1356,24 +1527,6 @@ The maximum number of elements to be stored in perfResults. An array to store performance metrics sorted ascending by compute time. - - - This function serves as a heuristic for obtaining the best suited algorithm for - cudnnConvolutionBackwardFilter_v3 for the given layer specifications. Based - on the input preference, this function will either return the fastest algorithm or the - fastest algorithm within a given memory limit. For an exhaustive search for the fastest - algorithm, please use cudnnFindConvolutionBackwardFilterAlgorithm. - - Handle to the previously initialized input tensor descriptor. - Handle to the previously initialized input differential tensor descriptor. - Previously initialized convolution descriptor. - Handle to a previously initialized filter descriptor. - Enumerant to express the preference criteria in terms of memory requirement and speed. - It is to specify the maximum amount of GPU memory the user is willing to - use as a workspace. This is currently a placeholder and is not used. - Enumerant that specifies which convolution algorithm should be used to - compute the results according to the specified preference - This function returns the amount of GPU memory workspace the user needs @@ -1407,40 +1560,6 @@ The maximum number of elements to be stored in perfResults. An array to store performance metrics sorted ascending by compute time. - - - This function serves as a heuristic for obtaining the best suited algorithm for - cudnnConvolutionBackwardData_v3 for the given layer specifications. Based - on the input preference, this function will either return the fastest algorithm or the - fastest algorithm within a given memory limit. For an exhaustive search for the fastest - algorithm, please use cudnnFindConvolutionBackwardDataAlgorithm. - - Handle to a previously initialized filter descriptor. - Handle to the previously initialized input differential tensor descriptor. - Previously initialized convolution descriptor. - Handle to the previously initialized output tensor descriptor. - Enumerant to express the preference criteria in terms of memory - requirement and speed. - It is to specify the maximum amount of GPU memory the user is willing to - use as a workspace. This is currently a placeholder and is not used. - Enumerant that specifies which convolution algorithm should be used to - compute the results according to the specified preference - - - - This function serves as a heuristic for obtaining the best suited algorithm for - cudnnConvolutionBackwardFilter for the given layer specifications.This function - will return all algorithms sorted by expected (based on internal heuristic) relative - performance with fastest being index 0 of perfResults.For an exhaustive search for the - fastest algorithm, please use cudnnFindConvolutionBackwardFilterAlgorithm. - - Handle to the previously initialized input tensor descriptor. - Handle to the previously initialized input differential tensor descriptor. - Previously initialized convolution descriptor. - Handle to a previously initialized filter descriptor. - The maximum number of elements to be stored in perfResults. - array to store performance metrics sorted ascending by compute time. - This function serves as a heuristic for obtaining the best suited algorithm for @@ -1554,6 +1673,21 @@ cudnnSetDropoutDescriptor. Pointer to GPU memory that holds random number generator states initialized by a prior call to cudnnSetDropoutDescriptor. + + + Helper function to calculate folding descriptors for dgrad + + + + + Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances + + + + + Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances + + An CudaDNNException is thrown, if any wrapped call to the cudnn-library does not return . @@ -2348,7 +2482,7 @@ A user-allocated array to store performance metrics sorted ascending by compute time. - + This function attempts all available cuDNN algorithms for cudnnConvolutionForward, using user-allocated GPU memory, and outputs performance metrics to a user-allocated array of @@ -2371,27 +2505,6 @@ Data pointer to GPU memory that is a necessary workspace for some algorithms. The size of this workspace will determine the availability of algorithms. A nil pointer is considered a workSpace of 0 bytes. Specifies the size in bytes of the provided workSpace. - - - This function serves as a heuristic for obtaining the best suited algorithm for - cudnnConvolutionForward for the given layer specifications. Based on the input - preference, this function will either return the fastest algorithm or the fastest algorithm - within a given memory limit. For an exhaustive search for the fastest algorithm, please - use cudnnFindConvolutionForwardAlgorithm. - - Handle to a previously created cuDNN context. - Handle to the previously initialized input tensor descriptor. - Handle to a previously initialized filter descriptor. - Previously initialized convolution descriptor. - Handle to the previously initialized output tensor descriptor. - Enumerant to express the preference criteria in terms of memory - requirement and speed. - It is used when enumerant preference is set to - CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT to specify the - maximum amount of GPU memory the user is willing to use as a workspace - Enumerant that specifies which convolution algorithm should be used to - compute the results according to the specified preference - @@ -2614,7 +2727,7 @@ The number of output elements stored in perfResults. A user-allocated array to store performance metrics sorted ascending by compute time. - + This function attempts all cuDNN algorithms for cudnnConvolutionBackwardFilter, using user-allocated GPU memory, and outputs performance metrics to a @@ -2637,25 +2750,6 @@ Data pointer to GPU memory that is a necessary workspace for some algorithms. The size of this workspace will determine the availabilty of algorithms. A nil pointer is considered a workSpace of 0 bytes. Specifies the size in bytes of the provided workSpace. - - - This function serves as a heuristic for obtaining the best suited algorithm for - cudnnConvolutionBackwardFilter for the given layer specifications. Based - on the input preference, this function will either return the fastest algorithm or the - fastest algorithm within a given memory limit. For an exhaustive search for the fastest - algorithm, please use cudnnFindConvolutionBackwardFilterAlgorithm. - - Handle to a previously created cuDNN context. - Handle to the previously initialized input tensor descriptor. - Handle to the previously initialized input differential tensor descriptor. - Previously initialized convolution descriptor. - Handle to a previously initialized filter descriptor. - Enumerant to express the preference criteria in terms of memory requirement and speed. - It is to specify the maximum amount of GPU memory the user is willing to - use as a workspace. This is currently a placeholder and is not used. - Enumerant that specifies which convolution algorithm should be used to - compute the results according to the specified preference - @@ -2772,26 +2866,6 @@ The number of output elements stored in perfResults. A user-allocated array to store performance metrics sorted ascending by compute time. - - - This function serves as a heuristic for obtaining the best suited algorithm for - cudnnConvolutionBackwardData for the given layer specifications. Based - on the input preference, this function will either return the fastest algorithm or the - fastest algorithm within a given memory limit. For an exhaustive search for the fastest - algorithm, please use cudnnFindConvolutionBackwardDataAlgorithm. - - Handle to a previously created cuDNN context. - Handle to a previously initialized filter descriptor. - Handle to the previously initialized input differential tensor descriptor. - Previously initialized convolution descriptor. - Handle to the previously initialized output tensor descriptor. - Enumerant to express the preference criteria in terms of memory - requirement and speed. - It is to specify the maximum amount of GPU memory the user is willing to - use as a workspace. This is currently a placeholder and is not used. - Enumerant that specifies which convolution algorithm should be used to - compute the results according to the specified preference - @@ -3852,7 +3926,7 @@ - + This function computes the gradient of a sampling operation. @@ -3871,7 +3945,7 @@ Pointer to scaling factor (in host memory) used to blend the gradient outputs dgrid with prior value in the destination pointer as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. Data pointer to GPU memory contains the output differential data. - + @@ -4103,22 +4177,6 @@ - - - - - - - - - - - - - - - - @@ -4550,6 +4608,22 @@ + + Create a destination descriptor for cudnnTransformTensor + + + Create an empty tensor transform descriptor + + + Initialize a previously created tensor transform descriptor. + + + Retrieves the values stored in a previously initialized tensor transform + descriptor. + + + Destroys a previously created tensor transform descriptor. + Constants for LRN, #define in cudnn.h @@ -4585,14 +4659,29 @@ MinEpsilon = 1e-5 - + - + Constant values for SEQDATA - - - - + + + + dimension count + + + + + Number of attention weight/bias tensors + + + + + + + + + + @@ -4918,6 +5007,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + CUDNN return codes @@ -5199,32 +5323,6 @@ done when applying the filter to the images. - - - cudnnConvolutionFwdPreference is an enumerated type used by - cudnnGetConvolutionForwardAlgorithm() to help the choice of the algorithm used - for the forward convolution. - - - - - In this configuration, the routine cudnnGetConvolutionForwardAlgorithm() is - guaranteed to return an algorithm that does not require any extra workspace to be provided by the - user. - - - - - In this configuration, the routine cudnnGetConvolutionForwardAlgorithm() will - return the fastest algorithm regardless how much workspace is needed to execute it. - - - - - In this configuration, the routine cudnnGetConvolutionForwardAlgorithm() will - return the fastest algorithm that fits within the memory limit that the user provided. - - cudnnConvolutionFwdAlgo is an enumerated type that exposes the different @@ -5374,31 +5472,6 @@ Selects the identity function, intended for bypassing the activation step in cudnnConvolutionBiasActivationForward() (need to use CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM). Does not work with cudnnActivationForward() or cudnnActivationBackward(). - - - cudnnConvolutionBwdFilterPreference is an enumerated type used by - cudnnGetConvolutionBackwardFilterAlgorithm() to help the choice of the - algorithm used for the backward filter convolution. - - - - - In this configuration, the routine cudnnGetConvolutionBackwardFilterAlgorithm() - is guaranteed to return an algorithm that does not require any extra workspace to be provided by the user. - - - - - In this configuration, the routine cudnnGetConvolutionBackwardFilterAlgorithm() - will return the fastest algorithm regardless how much workspace is needed to execute it. - - - - - In this configuration, the routine cudnnGetConvolutionBackwardFilterAlgorithm() - will return the fastest algorithm that fits within the memory limit that the user provided. - - cudnnConvolutionBwdFilterAlgo is an enumerated type that exposes the different @@ -5453,32 +5526,6 @@ results are deterministic. - - - cudnnConvolutionBwdDataPreference is an enumerated type used by - cudnnGetConvolutionBackwardDataAlgorithm() to help the choice of the - algorithm used for the backward data convolution. - - - - - In this configuration, the routine cudnnGetConvolutionBackwardDataAlgorithm() - is guaranteed to return an algorithm that does not require any extra workspace to be provided by the - user. - - - - - In this configuration, the routine cudnnGetConvolutionBackwardDataAlgorithm() - will return the fastest algorithm regardless how much workspace is needed to execute it. - - - - - In this configuration, the routine cudnnGetConvolutionBackwardDataAlgorithm() - will return the fastest algorithm that fits within the memory limit that the user provided. - - cudnnConvolutionBwdDataAlgo is an enumerated type that exposes the different @@ -5990,266 +6037,808 @@ - + - An opaque structure holding the - description of a generic n-D dataset. + CUDNN Reorder - + + Fold/unfold transforms - + - For dispose + do batch normalization only - + - Dispose + do batchNorm, then activation - + - For IDisposable + do batchNorm, then elemWiseAdd, then activation - - + - Returns the inner handle. + rnn cell formulas do not use biases - + - This function initializes a previously created dropout descriptor object. If states argument is equal to - NULL, random number generator states won't be initialized, and only dropout value will be set. No other - function should be writing to the memory + rnn cell formulas use one input bias in input GEMM - The probability with which the value from input would be propagated through the dropout layer. - Pointer to user-allocated GPU memory that will hold random number generator states. - Specifies size in bytes of the provided memory for the states. - Seed used to initialize random number generator states. - + - An opaque structure holding the description - of a filter dataset. + default, rnn cell formulas use two bias vectors - + + rnn cell formulas use one recurrent bias in recurrent GEMM - + - For dispose + disables LSTM cell clipping - + - Dispose + enables LSTM cell clipping - + - For IDisposable + padded, outer stride from one time-step to the next - - + - Returns the inner handle. + sequence length sorted and packed as in basic RNN api - + - This function initializes a previously created filter descriptor object into a 4D filter. - Filters layout must be contiguous in memory. + padded, outer stride from one batch to the next - Data type. - Enumerant holding the layout format. - Number of output feature maps. - Number of input feature maps. - Height of each filter. - Width of each filter. - + - This function queries the parameters of the previouly initialized filter descriptor object. + Sequence data descriptor - Data type. - Enumerant holding the layout format. - Number of output feature maps. - Number of input feature maps. - Height of each filter. - Width of each filter. - + - This function initializes a previously created filter descriptor object. Filters layout must - be contiguous in memory. + index in time - Data type. - Enumerant holding the layout format. - Dimension of the filter. - Array of dimension nbDims containing the size of the filter for each dimension. - + - This function queries a previously initialized filter descriptor object. + index in batch - Dimension of the expected filter descriptor. It is also the minimum size of - the arrays filterDimA in order to be able to hold the results - Data type. - Enumerant holding the layout format. - Actual dimension of the filter. - Array of dimension of at least nbDimsRequested that will be filled with - the filter parameters from the provided filter descriptor. - + - + index in beam - + + index in vector - + - For dispose + Multi-head attention modes set in attention descriptor - + - Dispose + multiple Q-s map to a single (K,V) set when beam size > 1 - + - For IDisposable + multiple Q-s map to multiple (K,V) sets when beam size > 1 - - + - Returns the inner handle. + no biases in attention input and output projections - + - This function initializes a previously created LRN descriptor object. + use biases in attention input and output projections - Normalization window width in elements. LRN layer uses a window - [center-lookBehind, center+lookAhead], where lookBehind = - floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1. So for n=10, - the window is [k-4...k...k+5] with a total of 10 samples. For - DivisiveNormalization layer the window has the same extents as above in - all 'spatial' dimensions (dimA[2], dimA[3], dimA[4]). By default lrnN is set - to 5 in cudnnCreateLRNDescriptor. - Value of the alpha variance scaling parameter in the normalization - formula. Inside the library code this value is divided by the - window width for LRN and by (window width)^#spatialDimensions - for DivisiveNormalization. By default this value is set to 1e-4 in - cudnnCreateLRNDescriptor. - Value of the beta power parameter in the normalization formula. By - default this value is set to 0.75 in cudnnCreateLRNDescriptor. - Value of the k parameter in normalization formula. By default this value is set to 2.0. - + - This function retrieves values stored in the previously initialized LRN descriptor object. + input projection weights for 'queries' - Pointers to receive values of parameters stored in the descriptor object. - See cudnnSetLRNDescriptor for more details. Any of these pointers can be - NULL (no value is returned for the corresponding parameter). - Pointers to receive values of parameters stored in the descriptor object. - See cudnnSetLRNDescriptor for more details. Any of these pointers can be - NULL (no value is returned for the corresponding parameter). - Pointers to receive values of parameters stored in the descriptor object. - See cudnnSetLRNDescriptor for more details. Any of these pointers can be - NULL (no value is returned for the corresponding parameter). - Pointers to receive values of parameters stored in the descriptor object. - See cudnnSetLRNDescriptor for more details. Any of these pointers can be - NULL (no value is returned for the corresponding parameter). - + - This function performs the forward LRN layer computation. + input projection weights for 'keys' - LRN layer mode of operation. Currently only - CUDNN_LRN_CROSS_CHANNEL_DIM1 is implemented. Normalization is - performed along the tensor's dimA[1]. - Pointer to scaling factors (in host memory) used to blend the layer output - value with prior value in the destination tensor as follows: dstValue = - alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section - for additional details. - Tensor descriptor objects for the input and output tensors. - Input tensor data pointer in device memory. - Pointer to scaling factors (in host memory) used to blend the layer output - value with prior value in the destination tensor as follows: dstValue = - alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section - for additional details. - Tensor descriptor objects for the input and output tensors. - Output tensor data pointer in device memory. - + - This function performs the forward LRN layer computation. + input projection weights for 'values' - LRN layer mode of operation. Currently only - CUDNN_LRN_CROSS_CHANNEL_DIM1 is implemented. Normalization is - performed along the tensor's dimA[1]. - Pointer to scaling factors (in host memory) used to blend the layer output - value with prior value in the destination tensor as follows: dstValue = - alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section - for additional details. - Tensor descriptor objects for the input and output tensors. - Input tensor data pointer in device memory. - Pointer to scaling factors (in host memory) used to blend the layer output - value with prior value in the destination tensor as follows: dstValue = - alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section - for additional details. - Tensor descriptor objects for the input and output tensors. - Output tensor data pointer in device memory. - + - This function performs the backward LRN layer computation. + output projection weights - LRN layer mode of operation. Currently only - CUDNN_LRN_CROSS_CHANNEL_DIM1 is implemented. Normalization is - performed along the tensor's dimA[1]. - Pointer to scaling factors (in host memory) used to blend the layer output - value with prior value in the destination tensor as follows: dstValue = - alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section - for additional details. - Tensor descriptor and pointer in device memory for the bottom layer's - data. (Bottom layer is the earlier layer in the computation graph during - inference). - Tensor descriptor and pointer in device memory for the bottom layer's - data. (Bottom layer is the earlier layer in the computation graph during - inference). - Tensor descriptor and pointer in device memory for the top layer's - cumulative loss differential data (error backpropagation). (Top layer is the - later layer in the computation graph during inference). - Tensor descriptor and pointer in device memory for the top layer's - cumulative loss differential data (error backpropagation). (Top layer is the - later layer in the computation graph during inference). - Tensor descriptor and pointer in device memory for the bottom layer's - data. (Bottom layer is the earlier layer in the computation graph - during inference). Note that these values are not modified during - backpropagation. - Tensor descriptor and pointer in device memory for the bottom layer's - data. (Bottom layer is the earlier layer in the computation graph - during inference). Note that these values are not modified during - backpropagation. - Pointer to scaling factors (in host memory) used to blend the layer output - value with prior value in the destination tensor as follows: dstValue = - alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section - for additional details. + + + + input projection bias tensor for 'queries' + + + + + input projection bias for 'keys' + + + + + input projection bias for 'values' + + + + + output projection biases + + + + + add partial gradients to wgrad output buffers + + + + + write partial gradients to wgrad output buffers + + + + + Input normalization mode for loss function + + + + + Input normalization mode for loss function + each op in [ ] can be disabled by passing NULL ptr + [per channel scale], [per channel bias], [activation], convolution, [generate BN stats] + + + + + [per channel scale], [per channel bias], [activation], convolutionBackwardWeights + + + + + utility for BN training in BN-conv fusion + computes the equivalent scale and bias from ySum ySqSum and learned scale, bias + optionally update running stats and generate saved stats + + + + + utility for BN inference in BN-conv fusion + computes the equivalent scale and bias from learned running stats and learned scale, bias + + + + + reserved for future use: convolution, [per channel scale], [per channel bias], [residual add], [activation] + + + + + reserved for future use: [per channel scale], [per channel bias], [residual add], activation, bitmask + + + + + reserved for future use + + + + + set XDESC: pass previously initialized cudnnTensorDescriptor_t + get XDESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get XDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_MODE: pass cudnnBatchNormMode_t* + + + + + set BN_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t + get BN_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get BN_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set ACTIVATION_DESC: pass previously initialized cudnnActivationDescriptor_t + get ACTIVATION_DESC: pass previously created cudnnActivationDescriptor_t + + + + + set CONV_DESC: pass previously initialized cudnnConvolutionDescriptor_t + get CONV_DESC: pass previously created cudnnConvolutionDescriptor_t + + + + + set WDESC: pass previously initialized cudnnFilterDescriptor_t + get WDESC: pass previously created cudnnFilterDescriptor_t + + + + + set/get WDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set DWDESC: pass previously initialized cudnnFilterDescriptor_t + get DWDESC: pass previously created cudnnFilterDescriptor_t + + + + + set/get DWDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set YDESC: pass previously initialized cudnnTensorDescriptor_t + get YDESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get YDATA_Placeholder: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set DYDESC: pass previously initialized cudnnTensorDescriptor_t + get DYDESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get DYDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set YSTATS_DESC: pass previously initialized cudnnTensorDescriptor_t + get YSTATS_DESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get YSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get YSQSUM_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set BN_SCALEBIAS_MEANVAR_DESC: pass previously initialized cudnnTensorDescriptor_t + get BN_SCALEBIAS_MEANVAR_DESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get BN_SCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_BIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_SAVED_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_SAVED_INVSTD_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_RUNNING_MEAN_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_RUNNING_VAR_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set ZDESC: pass previously initialized cudnnTensorDescriptor_t + get ZDESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get ZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set BN_Z_EQSCALEBIAS_DESC: pass previously initialized cudnnTensorDescriptor_t + get BN_Z_EQSCALEBIAS_DESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get BN_Z_EQSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_Z_EQBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set ACTIVATION_BITMASK_DESC: pass previously initialized cudnnTensorDescriptor_t + get ACTIVATION_BITMASK_DESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get ACTIVATION_BITMASK_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set DXDESC: pass previously initialized cudnnTensorDescriptor_t + get DXDESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get DXDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set DZDESC: pass previously initialized cudnnTensorDescriptor_t + get DZDESC: pass previously created cudnnTensorDescriptor_t + + + + + set/get DZDATA_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_DSCALE_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set/get BN_DBIAS_PLACEHOLDER: pass cudnnFusedOpsPointerPlaceHolder_t* + + + + + set: pass void* pointing to dev memory + get: pass void** pointing to host memory + + + + + set/get: pass size_t* pointing to host memory + + + + + set/get: pass int64_t* pointing to host memory + + + + + set/get: pass double* pointing to host memory + + + + + set/get: pass double* pointing to host memory + + + + + An opaque structure holding the + description of a generic n-D dataset. + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + This function initializes a previously created dropout descriptor object. If states argument is equal to + NULL, random number generator states won't be initialized, and only dropout value will be set. No other + function should be writing to the memory + + The probability with which the value from input would be propagated through the dropout layer. + Pointer to user-allocated GPU memory that will hold random number generator states. + Specifies size in bytes of the provided memory for the states. + Seed used to initialize random number generator states. + + + + An opaque structure holding the description + of a filter dataset. + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + This function initializes a previously created filter descriptor object into a 4D filter. + Filters layout must be contiguous in memory. + + Data type. + Enumerant holding the layout format. + Number of output feature maps. + Number of input feature maps. + Height of each filter. + Width of each filter. + + + + This function queries the parameters of the previouly initialized filter descriptor object. + + Data type. + Enumerant holding the layout format. + Number of output feature maps. + Number of input feature maps. + Height of each filter. + Width of each filter. + + + + This function initializes a previously created filter descriptor object. Filters layout must + be contiguous in memory. + + Data type. + Enumerant holding the layout format. + Dimension of the filter. + Array of dimension nbDims containing the size of the filter for each dimension. + + + + This function queries a previously initialized filter descriptor object. + + Dimension of the expected filter descriptor. It is also the minimum size of + the arrays filterDimA in order to be able to hold the results + Data type. + Enumerant holding the layout format. + Actual dimension of the filter. + Array of dimension of at least nbDimsRequested that will be filled with + the filter parameters from the provided filter descriptor. + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + This function initializes a previously created LRN descriptor object. + + Normalization window width in elements. LRN layer uses a window + [center-lookBehind, center+lookAhead], where lookBehind = + floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1. So for n=10, + the window is [k-4...k...k+5] with a total of 10 samples. For + DivisiveNormalization layer the window has the same extents as above in + all 'spatial' dimensions (dimA[2], dimA[3], dimA[4]). By default lrnN is set + to 5 in cudnnCreateLRNDescriptor. + Value of the alpha variance scaling parameter in the normalization + formula. Inside the library code this value is divided by the + window width for LRN and by (window width)^#spatialDimensions + for DivisiveNormalization. By default this value is set to 1e-4 in + cudnnCreateLRNDescriptor. + Value of the beta power parameter in the normalization formula. By + default this value is set to 0.75 in cudnnCreateLRNDescriptor. + Value of the k parameter in normalization formula. By default this value is set to 2.0. + + + + This function retrieves values stored in the previously initialized LRN descriptor object. + + Pointers to receive values of parameters stored in the descriptor object. + See cudnnSetLRNDescriptor for more details. Any of these pointers can be + NULL (no value is returned for the corresponding parameter). + Pointers to receive values of parameters stored in the descriptor object. + See cudnnSetLRNDescriptor for more details. Any of these pointers can be + NULL (no value is returned for the corresponding parameter). + Pointers to receive values of parameters stored in the descriptor object. + See cudnnSetLRNDescriptor for more details. Any of these pointers can be + NULL (no value is returned for the corresponding parameter). + Pointers to receive values of parameters stored in the descriptor object. + See cudnnSetLRNDescriptor for more details. Any of these pointers can be + NULL (no value is returned for the corresponding parameter). + + + + This function performs the forward LRN layer computation. + + LRN layer mode of operation. Currently only + CUDNN_LRN_CROSS_CHANNEL_DIM1 is implemented. Normalization is + performed along the tensor's dimA[1]. + Pointer to scaling factors (in host memory) used to blend the layer output + value with prior value in the destination tensor as follows: dstValue = + alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section + for additional details. + Tensor descriptor objects for the input and output tensors. + Input tensor data pointer in device memory. + Pointer to scaling factors (in host memory) used to blend the layer output + value with prior value in the destination tensor as follows: dstValue = + alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section + for additional details. + Tensor descriptor objects for the input and output tensors. + Output tensor data pointer in device memory. + + + + This function performs the forward LRN layer computation. + + LRN layer mode of operation. Currently only + CUDNN_LRN_CROSS_CHANNEL_DIM1 is implemented. Normalization is + performed along the tensor's dimA[1]. + Pointer to scaling factors (in host memory) used to blend the layer output + value with prior value in the destination tensor as follows: dstValue = + alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section + for additional details. + Tensor descriptor objects for the input and output tensors. + Input tensor data pointer in device memory. + Pointer to scaling factors (in host memory) used to blend the layer output + value with prior value in the destination tensor as follows: dstValue = + alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section + for additional details. + Tensor descriptor objects for the input and output tensors. + Output tensor data pointer in device memory. + + + + This function performs the backward LRN layer computation. + + LRN layer mode of operation. Currently only + CUDNN_LRN_CROSS_CHANNEL_DIM1 is implemented. Normalization is + performed along the tensor's dimA[1]. + Pointer to scaling factors (in host memory) used to blend the layer output + value with prior value in the destination tensor as follows: dstValue = + alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section + for additional details. + Tensor descriptor and pointer in device memory for the bottom layer's + data. (Bottom layer is the earlier layer in the computation graph during + inference). + Tensor descriptor and pointer in device memory for the bottom layer's + data. (Bottom layer is the earlier layer in the computation graph during + inference). + Tensor descriptor and pointer in device memory for the top layer's + cumulative loss differential data (error backpropagation). (Top layer is the + later layer in the computation graph during inference). + Tensor descriptor and pointer in device memory for the top layer's + cumulative loss differential data (error backpropagation). (Top layer is the + later layer in the computation graph during inference). + Tensor descriptor and pointer in device memory for the bottom layer's + data. (Bottom layer is the earlier layer in the computation graph + during inference). Note that these values are not modified during + backpropagation. + Tensor descriptor and pointer in device memory for the bottom layer's + data. (Bottom layer is the earlier layer in the computation graph + during inference). Note that these values are not modified during + backpropagation. + Pointer to scaling factors (in host memory) used to blend the layer output + value with prior value in the destination tensor as follows: dstValue = + alpha[0]*resultValue + beta[0]*priorDstValue. Please refer to this section + for additional details. Tensor descriptor and pointer in device memory for the bottom layer's cumulative loss differential data (error backpropagation). (Bottom layer is the earlier layer in the computation graph during inference). @@ -6659,58 +7248,118 @@ Height of images in the output Width of images in the output - + + + ReduceTensorDescriptor is a pointer to an opaque structure + holding the description of a tensor reduction operation, used as a parameter to + cudnnReduceTensor(). cudnnCreateReduceTensorDescriptor() is used to create + one instance, and cudnnSetReduceTensorDescriptor() must be used to initialize this instance. + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + + + + + + + + + + + + + + + + + + + - ReduceTensorDescriptor is a pointer to an opaque structure - holding the description of a tensor reduction operation, used as a parameter to - cudnnReduceTensor(). cudnnCreateReduceTensorDescriptor() is used to create - one instance, and cudnnSetReduceTensorDescriptor() must be used to initialize this instance. + - + - + For dispose - + Dispose - + For IDisposable - + Returns the inner handle. - + - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - - @@ -6775,7 +7424,7 @@ An array of tensor descriptors describing the input to each recurrent iteration. Minimum amount of GPU memory needed as reserve space to be able to train an RNN with the specified descriptor and input tensors. - + This function is used to query the amount of parameter space required to execute the RNN described by rnnDesc with inputs dimensions defined by xDesc. @@ -7373,11 +8022,156 @@ - + + + The math type specified in a given RNN descriptor. + + + + The math type specified in a given RNN descriptor. + + + The cudnnSetRNNProjectionLayers() function should be called after cudnnSetRNNDescriptor() to enable the "recurrent" and/or "output" projection in a recursive neural network + + The size of the LSTM cell output after the “recurrent” projection. This value should not be larger than hiddenSize programmed via cudnnSetRNNDescriptor(). + This parameter should be zero. + + + + This function retrieves the current RNN “projection” parameters. By default the projection feature is disabled so invoking this function immediately after cudnnSetRNNDescriptor() will yield recProjSize equal to hiddenSize and outProjSize set to zero. The cudnnSetRNNProjectionLayers() method enables the RNN projection. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + This function attempts all available cuDNN algorithms for cudnnRNNForwardInference, using user-allocated GPU memory, and outputs performance metrics to a user-allocated array of cudnnAlgorithmPerformance_t. These metrics are written in sorted fashion where the first element has the lowest compute time. + + + + + This function attempts all available cuDNN algorithms for cudnnRNNForwardTraining, using user-allocated GPU memory, and outputs performance metrics to a user-allocated array of cudnnAlgorithmPerformance_t. These metrics are written in sorted fashion where the first element has the lowest compute time. + + + + + + + + + + + + + + + This function attempts all available cuDNN algorithms for cudnnRNNForwardInference, using user-allocated GPU memory, and outputs performance metrics to a user-allocated array of cudnnAlgorithmPerformance_t. These metrics are written in sorted fashion where the first element has the lowest compute time. + + + + + This function attempts all available cuDNN algorithms for cudnnRNNForwardTraining, using user-allocated GPU memory, and outputs performance metrics to a user-allocated array of cudnnAlgorithmPerformance_t. These metrics are written in sorted fashion where the first element has the lowest compute time. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + An opaque structure holding the @@ -7413,13 +8207,86 @@ - This function destroys a previously created spatial transformer descriptor object. + Enumerant to specify the sampler type. Data type. Dimension of the transformed tensor. Array of dimension nbDims containing the size of the transformed tensor for every dimension. + + + This function generates a grid of coordinates in the input tensor corresponding to each pixel from the output tensor. + + Affine transformation matrix. It should be of size n*2*3 for a 2d transformation, where n is the number of images specified in stDesc. + A grid of coordinates. It is of size n*h*w*2 for a 2d transformation, where n, h, w is specified in stDesc. In the 4th dimension, the first coordinate is x, and the second coordinate is y. + + + + This function computes the gradient of a grid generation operation. + + Data pointer to GPU memory contains the input differential data. + Data pointer to GPU memory contains the output differential data. + + + + + This function performs a sampler operation and generates the output tensor using the grid given by the grid generator. + + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor xDesc. + A grid of coordinates generated by cudnnSpatialTfGridGeneratorForward. + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized output tensor descriptor. + Data pointer to GPU memory associated with the output tensor descriptor yDesc. + + + + This function performs a sampler operation and generates the output tensor using the grid given by the grid generator. + + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor xDesc. + A grid of coordinates generated by cudnnSpatialTfGridGeneratorForward. + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized output tensor descriptor. + Data pointer to GPU memory associated with the output tensor descriptor yDesc. + + + + This function computes the gradient of a sampling operation. + + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor xDesc. + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized output differential tensor descriptor. + Data pointer to GPU memory associated with the output tensor descriptor dxDesc. + Pointer to scaling factor (in host memory) used to blend the gradient outputs dgrid with prior value in the destination pointer as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized input differential tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor dyDesc. + A grid of coordinates generated by cudnnSpatialTfGridGeneratorForward. + Pointer to scaling factor (in host memory) used to blend the gradient outputs dgrid with prior value in the destination pointer as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Data pointer to GPU memory contains the output differential data. + + + + This function computes the gradient of a sampling operation. + + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized input tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor xDesc. + Pointer to scaling factor (in host memory) used to blend the source value with prior value in the destination tensor as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized output differential tensor descriptor. + Data pointer to GPU memory associated with the output tensor descriptor dxDesc. + Pointer to scaling factor (in host memory) used to blend the gradient outputs dgrid with prior value in the destination pointer as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Handle to the previously initialized input differential tensor descriptor. + Data pointer to GPU memory associated with the tensor descriptor dyDesc. + A grid of coordinates generated by cudnnSpatialTfGridGeneratorForward. + Pointer to scaling factor (in host memory) used to blend the gradient outputs dgrid with prior value in the destination pointer as follows: dstValue = alpha[0]*srcValue + beta[0]*priorDstValue. + Data pointer to GPU memory contains the output differential data. + An opaque structure holding the @@ -7537,5 +8404,46 @@ Array of dimension of at least nbDimsRequested that will be filled with the strides from the provided tensor descriptor. + + + An opaque structure holding the + description of a generic n-D dataset. + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Returns the inner handle. + + + + + Initialize a previously created tensor transform descriptor. + + + + + Retrieves the values stored in a previously initialized tensor transform descriptor. + + diff --git a/src/external/ManagedCuda/CudaDNN.dll b/src/external/ManagedCuda/CudaDNN.dll index 8d4a59a4..0c11622b 100644 Binary files a/src/external/ManagedCuda/CudaDNN.dll and b/src/external/ManagedCuda/CudaDNN.dll differ diff --git a/src/external/ManagedCuda/ManagedCuda.dll b/src/external/ManagedCuda/ManagedCuda.dll index f3bb24a8..98fab494 100644 Binary files a/src/external/ManagedCuda/ManagedCuda.dll and b/src/external/ManagedCuda/ManagedCuda.dll differ diff --git a/src/external/ManagedCuda/ManagedCuda.xml b/src/external/ManagedCuda/ManagedCuda.xml index d215ac17..bcd3017c 100644 --- a/src/external/ManagedCuda/ManagedCuda.xml +++ b/src/external/ManagedCuda/ManagedCuda.xml @@ -4,13410 +4,12191 @@ ManagedCuda - + - CUDA array + Flags to register a graphics resource - + - + Specifies no hints about how this resource will be used. + It is therefore assumed that this resource will be read + from and written to by CUDA. This is the default value. - + - CUDA linker + Specifies that CUDA will not write to this resource. - + - + Specifies that CUDA will not read from this resource and + will write over the entire contents of the resource, so + none of the data previously stored in the resource will + be preserved. - + - CUDA mipmapped array + Specifies that CUDA will bind this resource to a surface reference. - + - + - Cuda context + Flags for mapping and unmapping graphics interop resources - + - + Specifies no hints about how this resource will be used. + It is therefore assumed that this resource will be read from and written to by CUDA. This is the default value. - + - Cuda device + Specifies that CUDA will not write to this resource. - + - + Specifies that CUDA will not read from + this resource and will write over the entire contents of the resource, so none of the data previously stored in the + resource will be preserved. - + - Device that represents the CPU + CUTexRefSetFlags - + - Device that represents an invalid device + - + - Pointer to CUDA device memory + Read the texture as integers rather than promoting the values to floats in the range [0,1]. + Flag for - + - + Use normalized texture coordinates in the range [0,1) instead of [0,dim). + Flag for - + - + Perform sRGB -> linear conversion during texture read. - - - + - + Disable any trilinear filtering optimizations. - - - + - + CUDA driver API initialization flags - - - - + - + Currently no initialization flags are defined. - - - - + - + CUDA driver API Context Enable Peer Access flags - - - - + - + Currently no flags are defined. - - - - + - Returns true if both objects are of type CUdeviceptr and if both Pointer member is equal. + CUDA stream flags - - - + - Overrides object.GetHashCode() + For compatibilty with pre Cuda 5.0, equal to Default - - + - override ToString() + Default stream flag - - + - + Stream does not synchronize with stream 0 (the NULL stream) - - + - The on which a pointer was allocated or registered + CudaCooperativeLaunchMultiDeviceFlags - + - The describing the physical location of a pointer + No flags - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + If set, each kernel launched as part of ::cuLaunchCooperativeKernelMultiDevice only + waits for prior work in the stream corresponding to that GPU to complete before the + kernel begins execution. - + - The address at which a pointer's memory may be accessed on the host + If set, any subsequent work pushed in a stream that participated in a call to + ::cuLaunchCooperativeKernelMultiDevice will only wait for the kernel launched on + the GPU corresponding to that stream to complete before it begins execution. - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + CUDAArray3DFlags - + - Synchronize every synchronous memory operation initiated on this region + No flags - + - A process-wide unique ID for an allocated memory region + if set, the CUDA array contains an array of 2D slices and + the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the + number of slices, not the depth of a 3D array. - + - Indicates if the pointer points to managed memory + if set, the CUDA array contains an array of layers where each layer is either a 1D + or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number + of layers, not the depth of a 3D array. - + - Cuda event + this flag must be set in order to bind a surface reference + to the CUDA array - + - + If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The + width of such a CUDA array must be equal to its height, and Depth must be six. + If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps + and Depth must be a multiple of six. - + - Cuda function / kernel + This flag must be set in order to perform texture gather operations on a CUDA array. - + - + This flag if set indicates that the CUDA array is a DEPTH_TEXTURE. - + - Cuda module + This flag indicates that the CUDA array may be bound as a color target in an external graphics API - + - + This flag if set indicates that the CUDA array or CUDA mipmapped array + is a sparse CUDA array or CUDA mipmapped array respectively - + - Cuda stream + CUMemHostAllocFlags. All of these flags are orthogonal to one another: a developer may allocate memory that is portable, mapped and/or + write-combined with no restrictions. - + - + No flags - + - Returns the CUDA NULL stream (0) + The memory returned by this call will be considered as pinned memory + by all CUDA contexts, not just the one that performed the allocation. - + - Stream handle that can be passed as a CUstream to use an implicit stream - with legacy synchronization behavior. + Maps the allocation into the CUDA address space. The device pointer + to the memory may be obtained by calling . This feature is available only on + GPUs with compute capability greater than or equal to 1.1. - + - Stream handle that can be passed as a CUstream to use an implicit stream - with per-thread synchronization behavior. + Allocates the memory as write-combined (WC). WC memory + can be transferred across the PCI Express bus more quickly on some system configurations, but cannot be read + efficiently by most CPUs. WC memory is a good option for buffers that will be written by the CPU and read by + the GPU via mapped pinned memory or host->device transfers. + If set, host memory is allocated as write-combined - fast to write, + faster to DMA, slow to read except via SSE4 streaming load instruction + (MOVNTDQA). - + - CUDA texture reference + Context creation flags. + The two LSBs of the flags parameter can be used to control how the OS thread, which owns the CUDA context at + the time of an API call, interacts with the OS scheduler when waiting for results from the GPU. - + - + The default value if the flags parameter is zero, uses a heuristic based on the + number of active CUDA contexts in the process C and the number of logical processors in the system P. If C > + P, then CUDA will yield to other OS threads when waiting for the GPU, otherwise CUDA will not yield while + waiting for results and actively spin on the processor. - + - CUDA surface reference + Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease + latency when waiting for the GPU, but may lower the performance of CPU threads if they are performing + work in parallel with the CUDA thread. - + - + Instruct CUDA to yield its thread when waiting for results from the GPU. This can + increase latency when waiting for the GPU, but can increase the performance of CPU threads performing work + in parallel with the GPU. - + - CUDA graphics interop resource (DirectX / OpenGL) + Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work. - + - + No description found... - + - CUDA texture object + Instruct CUDA to support mapped pinned allocations. This flag must be set in order to allocate pinned host memory that is accessible to the GPU. - + - + Instruct CUDA to not reduce local memory after resizing local memory + for a kernel. This can prevent thrashing by local memory allocations when launching many kernels with high + local memory usage at the cost of potentially increased memory usage. - + - CUDA surface object + No description found... - + - + CUMemHostRegisterFlags. All of these flags are orthogonal to one another: a developer may allocate memory that is portable or mapped + with no restrictions. - + - CUDA definition of UUID + No flags - + - + The memory returned by this call will be considered as pinned memory + by all CUDA contexts, not just the one that performed the allocation. - + - 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms + Maps the allocation into the CUDA address space. The device pointer + to the memory may be obtained by calling . This feature is available only on + GPUs with compute capability greater than or equal to 1.1. - + - + If set, the passed memory pointer is treated as pointing to some + memory-mapped I/O space, e.g. belonging to a third-party PCIe device. + On Windows the flag is a no-op. + On Linux that memory is marked as non cache-coherent for the GPU and + is expected to be physically contiguous. + On all other platforms, it is not supported and CUDA_ERROR_INVALID_VALUE + is returned. - + - Interprocess Handle for Events + If set, the passed memory pointer is treated as pointing to memory that is + considered read-only by the device. On platforms without + CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES, this flag is + required in order to register memory mapped to the CPU as read-only. Support + for the use of this flag can be queried from the device attribute + CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED. Using this flag with + a current context associated with a device that does not have this attribute + set will cause ::cuMemHostRegister to error with CUDA_ERROR_NOT_SUPPORTED. - + - + Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers - + - Interprocess Handle for Memory + No flags - + - + Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers - + - half precission floating point + Flag for cuStreamAddCallback() - + - two half precission floating point (x,y) + No flags - + - CUDA external memory + Event creation flags - + - + Default event creation flag. - + - CUDA external semaphore + Specifies that event should use blocking synchronization. A CPU thread + that uses to wait on an event created with this flag will block until the event has actually + been recorded. - + - + Event will not record timing data - + - CUDA graph + Event is suitable for interprocess use. CUEventFlags.DisableTiming must be set - + - + Event record flags - + - CUDA graph node + Default event record flag - + - + When using stream capture, create an event record node + instead of the default behavior. This flag is invalid + when used outside of capture. - + - Returns the type of the Node + Event wait flags - + - Sets the parameters of host node nodeParams. + Default event wait flag - - + - Sets the parameters of kernel node nodeParams. + When using stream capture, create an event wait node + instead of the default behavior. This flag is invalid + when used outside of capture. - - + - Sets the parameters of memcpy node nodeParams. + Flags for ::cuStreamWaitValue32 - - + - Sets the parameters of memset node nodeParams. + Wait until (int32_t)(*addr - value) >= 0 (or int64_t for 64 bit values). Note this is a cyclic comparison which ignores wraparound. (Default behavior.) - - + - Gets the parameters of host node. + Wait until *addr == value. - - + - Gets the parameters of kernel node. + Wait until (*addr & value) != 0. - - + - Gets the parameters of memcpy node. + Wait until ~(*addr | value) != 0. Support for this operation can be + queried with ::cuDeviceGetAttribute() and ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR. + Generally, this requires compute capability 7.0 or greater. - - + - Gets the parameters of memset node. + Follow the wait operation with a flush of outstanding remote writes. This + means that, if a remote write operation is guaranteed to have reached the + device before the wait can be satisfied, that write is guaranteed to be + visible to downstream device work. The device is permitted to reorder + remote writes internally. For example, this flag would be required if + two remote writes arrive in a defined order, the wait is satisfied by the + second write, and downstream work needs to observe the first write. - - + - Only for ChildGraphNodes + Flags for ::cuStreamWriteValue32 - - + - Returns a node's dependencies. + Default behavior - - + - Returns a node's dependent nodes + Permits the write to be reordered with writes which were issued + before it, as a performance optimization. Normally, ::cuStreamWriteValue32 will provide a memory fence before the + write, which has similar semantics to __threadfence_system() but is scoped to the stream rather than a CUDA thread. - + - CUDA executable graph + Indicates that the external memory object is a dedicated resource - + - + No flags - + - Legacy device properties + Indicates that the external memory object is a dedicated resource - + - Maximum number of threads per block + parameter of ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS - + - Maximum size of each dimension of a block + When the /p flags parameter of ::CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS + contains this flag, it indicates that signaling an external semaphore object + should skip performing appropriate memory synchronization operations over all + the external memory objects that are imported as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, + which otherwise are performed by default to ensure data coherency with other + importers of the same NvSciBuf memory objects. - + - Maximum size of each dimension of a grid + When the /p flags parameter of ::CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS + contains this flag, it indicates that waiting on an external semaphore object + should skip performing appropriate memory synchronization operations over all + the external memory objects that are imported as ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF, + which otherwise are performed by default to ensure data coherency with other + importers of the same NvSciBuf memory objects. - + - Shared memory available per block in bytes + flags of ::cuDeviceGetNvSciSyncAttributes - + - Constant memory available on device in bytes + When /p flags of ::cuDeviceGetNvSciSyncAttributes is set to this, + it indicates that application needs signaler specific NvSciSyncAttr + to be filled by ::cuDeviceGetNvSciSyncAttributes. - + - Warp size in threads. Also called SIMD width. + When /p flags of ::cuDeviceGetNvSciSyncAttributes is set to this, + it indicates that application needs waiter specific NvSciSyncAttr + to be filled by ::cuDeviceGetNvSciSyncAttributes. - + - Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through - . + Flags for specifying particular handle types - + - 32-bit registers available per block + Does not allow any export mechanism. - + - Clock frequency in kilohertz + Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int) - + - Alignment requirement for textures. texture base addresses that are aligned to textureAlign bytes do not - need an offset applied to texture fetches. + Allows a Win32 NT handle to be used for exporting. (HANDLE) - + - 2D memory copy parameters + Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE) - + - Source X in bytes + Specifies the memory protection flags for mapping. - + - Source Y + Default, make the address range not accessible - + - Source memory type (host, device, array) + Make the address range read accessible - + - Source host pointer + Make the address range read-write accessible - + - Source device pointer + Flag for requesting different optimal and required granularities for an allocation. - + - Source array reference + Minimum required granularity for allocation - + - Source pitch (ignored when src is array) + Recommended granularity for allocation for best performance - + - Destination X in bytes + Bitmasks for ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS - - - Destination Y - + + - + - Destination memory type (host, device, array) + ::cuFlushGPUDirectRDMAWrites() and its CUDA Runtime API counterpart are supported on the device. - + - Destination host pointer + The ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. - + - Destination device pointer + The additional write options for ::cuGraphDebugDotPrint - - - Destination array reference - + + - + - Destination pitch (ignored when dst is array) + Output all debug data as if every debug flag is enabled - + - Width of 2D memory copy in bytes + Use CUDA Runtime structures for output - + - Height of 2D memory copy + Adds CUDA_KERNEL_NODE_PARAMS values to output - + - 3D memory copy parameters + Adds CUDA_MEMCPY3D values to output - + - Source X in bytes + Adds CUDA_MEMSET_NODE_PARAMS values to output - + - Source Y + Adds CUDA_HOST_NODE_PARAMS values to output - + - Source Z + Adds CUevent handle from record and wait nodes to output - + - Source LOD + Adds CUDA_EXT_SEM_SIGNAL_NODE_PARAMS values to output - + - Source memory type (host, device, array) + Adds CUDA_EXT_SEM_WAIT_NODE_PARAMS values to output - + - Source host pointer + Adds CUkernelNodeAttrValue values to output - + - Source device pointer + Adds node handles and every kernel function handle to output - + - Source array reference + Adds memory alloc node parameters to output - + - Must be NULL + Adds memory free node parameters to output - + - Source pitch (ignored when src is array) + Flags for user objects for graphs - + + + + - Source height (ignored when src is array; may be 0 if Depth==1) + Indicates the destructor execution is not synchronized by any CUDA handle. - + - Destination X in bytes + Flags for retaining user object references for graphs - + + + + - Destination Y + Transfer references from the caller rather than creating new references. - + - Destination Z + Flags for instantiating a graph - + + + + - Destination LOD + Automatically free memory allocated in a graph before relaunching. - + - Destination memory type (host, device, array) + CUDA stream callback + The stream the callback was added to, as passed to ::cuStreamAddCallback. May be NULL. + CUDA_SUCCESS or any persistent error on the stream. + User parameter provided at registration. - + - Destination host pointer + Block size to per-block dynamic shared memory mapping for a certain + kernel. + e.g.: + If no dynamic shared memory is used: x => 0 + If 4 bytes shared memory per thread is used: x = 4 * x + block size + The dynamic shared memory needed by a block - + - Destination device pointer + CUDA host function + Argument value passed to the function - + - Destination array reference + Texture reference addressing modes - + - Must be NULL + Wrapping address mode - + - Destination pitch (ignored when dst is array) + Clamp to edge address mode - + - Destination height (ignored when dst is array; may be 0 if Depth==1) + Mirror address mode - + - Width of 3D memory copy in bytes + Border address mode - + - Height of 3D memory copy + Array formats - + - Depth of 3D memory copy + Unsigned 8-bit integers - + - 3D memory copy parameters + Unsigned 16-bit integers - + - Source X in bytes + Unsigned 32-bit integers - + - Source Y + Signed 8-bit integers - + - Source Z + Signed 16-bit integers - + - Source LOD + Signed 32-bit integers - + - Source memory type (host, device, array) + 16-bit floating point - + - Source host pointer + 32-bit floating point - + - Source device pointer + 8-bit YUV planar format, with 4:2:0 sampling - + - Source array reference + Compute mode that device is currently in. - + - Source context (ignored with srcMemoryType is array) + Default mode - Device is not restricted and can have multiple CUDA + contexts present at a single time. - + - Source pitch (ignored when src is array) + Compute-prohibited mode - Device is prohibited from creating + new CUDA contexts. - + - Source height (ignored when src is array; may be 0 if Depth==1) + Compute-exclusive-process mode (Only one context used by a + single process can be present on this device at a time) - + - Destination X in bytes + Memory advise values - + - Destination Y + Data will mostly be read and only occassionally be written to - + - Destination Z + Undo the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY - + - Destination LOD + Set the preferred location for the data as the specified device - + - Destination memory type (host, device, array) + Clear the preferred location for the data - + - Destination host pointer + Data will be accessed by the specified device, so prevent page faults as much as possible - + - Destination device pointer + Let the Unified Memory subsystem decide on the page faulting policy for the specified device - + - Destination array reference + Context Attach flags - + - Destination context (ignored with dstMemoryType is array) + None - + - Destination pitch (ignored when dst is array) + Device properties - + - Destination height (ignored when dst is array; may be 0 if Depth==1) + Maximum number of threads per block - + - Width of 3D memory copy in bytes + Maximum block dimension X - + - Height of 3D memory copy + Maximum block dimension Y - + - Depth of 3D memory copy + Maximum block dimension Z - + - Array descriptor + Maximum grid dimension X - + - Width of array + Maximum grid dimension Y - + - Height of array + Maximum grid dimension Z - + - Array format + Maximum amount of shared memory + available to a thread block in bytes; this amount is shared by all thread blocks simultaneously resident on a + multiprocessor - + - Channels per array element + Deprecated, use MaxSharedMemoryPerBlock - + - 3D array descriptor + Memory available on device for __constant__ variables in a CUDA C kernel in bytes - + - Width of 3D array + Warp size in threads - + - Height of 3D array + Maximum pitch in bytes allowed by the memory copy functions + that involve memory regions allocated through - + - Depth of 3D array + Deprecated, use MaxRegistersPerBlock - + - Array format + Maximum number of 32-bit registers available + to a thread block; this number is shared by all thread blocks simultaneously resident on a multiprocessor - + - Channels per array element + Typical clock frequency in kilohertz - + - Flags + Alignment requirement; texture base addresses + aligned to textureAlign bytes do not need an offset applied to texture fetches - + - Idea of a SizeT type from http://blogs.hoopoe-cloud.com/index.php/tag/cudanet/, entry from Tuesday, September 15th, 2009 + 1 if the device can concurrently copy memory between host + and device while executing a kernel, or 0 if not - + - + Number of multiprocessors on device - - + - + Specifies whether there is a run time limit on kernels. + 1 if there is a run time limit for kernels executed on the device, or 0 if not - - + - + Device is integrated with host memory. 1 if the device is integrated with the memory subsystem, or 0 if not - - + - + Device can map host memory into CUDA address space. 1 if the device can map host memory into the + CUDA address space, or 0 if not - - + - + Compute mode (See for details) - - + - + Maximum 1D texture width - - + - + Maximum 2D texture width - - - + - + Maximum 2D texture height - - - + - + Maximum 3D texture width - - - + - + Maximum 3D texture height - - - + - + Maximum 3D texture depth - - - + - + Maximum texture array width - - - + - + Maximum texture array height - - - + - + Maximum slices in a texture array - - - + - + Alignment requirement for surfaces - - - + - + Device can possibly execute multiple kernels concurrently. + 1 if the device supports executing multiple kernels + within the same context simultaneously, or 0 if not. It is not guaranteed that multiple kernels will be resident on + the device concurrently so this feature should not be relied upon for correctness. - - - + - + Device has ECC support enabled. 1 if error correction is enabled on the device, 0 if error correction + is disabled or not supported by the device. - - - + - + PCI bus ID of the device - - - + - + PCI device ID of the device - - - - + - + Device is using TCC driver model - - - - + - Define operator + on converted to ulong values to avoid fall back to int + Peak memory clock frequency in kilohertz - - - - + - Define operator + on converted to ulong values to avoid fall back to int + Global memory bus width in bits - - - - + - Define operator + on converted to ulong values to avoid fall back to int + Size of L2 cache in bytes - - - - + - Define operator + on converted to ulong values to avoid fall back to int + Maximum resident threads per multiprocessor - - - - + - Define operator + on converted to ulong values to avoid fall back to int + Number of asynchronous engines - - - - + - Define operator - on converted to ulong values to avoid fall back to int + Device shares a unified address space with the host - - - - + - Define operator - on converted to ulong values to avoid fall back to int + Maximum 1D layered texture width - - - - + - Define operator - on converted to ulong values to avoid fall back to int + Maximum layers in a 1D layered texture - - - - + - Define operator - on converted to ulong values to avoid fall back to int + PCI domain ID of the device - - - - + - Define operator - on converted to ulong values to avoid fall back to int + Pitch alignment requirement for textures - - - - + - Define operator * on converted to ulong values to avoid fall back to int + Maximum cubemap texture width/height - - - - + - Define operator * on converted to ulong values to avoid fall back to int + Maximum cubemap layered texture width/height - - - - + - Define operator * on converted to ulong values to avoid fall back to int + Maximum layers in a cubemap layered texture - - - - + - Define operator * on converted to ulong values to avoid fall back to int + Maximum 1D surface width - - - - + - Define operator * on converted to ulong values to avoid fall back to int + Maximum 2D surface width - - - - + - Define operator / on converted to ulong values to avoid fall back to int + Maximum 2D surface height - - - - + - Define operator / on converted to ulong values to avoid fall back to int + Maximum 3D surface width - - - - + - Define operator / on converted to ulong values to avoid fall back to int + Maximum 3D surface height - - - - + - Define operator / on converted to ulong values to avoid fall back to int + Maximum 3D surface depth - - - - + - Define operator / on converted to ulong values to avoid fall back to int + Maximum 1D layered surface width - - - - + - Define operator > on converted to ulong values to avoid fall back to int + Maximum layers in a 1D layered surface - - - - + - Define operator > on converted to ulong values to avoid fall back to int + Maximum 2D layered surface width - - - - + - Define operator > on converted to ulong values to avoid fall back to int + Maximum 2D layered surface height - - - - + - Define operator > on converted to ulong values to avoid fall back to int + Maximum layers in a 2D layered surface - - - - + - Define operator > on converted to ulong values to avoid fall back to int + Maximum cubemap surface width - - - - + - Define operator < on converted to ulong values to avoid fall back to int + Maximum cubemap layered surface width - - - - + - Define operator < on converted to ulong values to avoid fall back to int + Maximum layers in a cubemap layered surface - - - - + - Define operator < on converted to ulong values to avoid fall back to int + Maximum 1D linear texture width - - - - + - Define operator < on converted to ulong values to avoid fall back to int + Maximum 2D linear texture width - - - - + - Define operator < on converted to ulong values to avoid fall back to int + Maximum 2D linear texture height - - - - + - + Maximum 2D linear texture pitch in bytes - - - + - returns this.value.ToString() + Maximum mipmapped 2D texture width - - + - Returns this.value.GetHashCode() + Maximum mipmapped 2D texture height - - + - Inner struct for CudaResourceDesc + Major compute capability version number - + - Device pointer + Minor compute capability version number - + - Array format + Maximum mipmapped 1D texture width - + - Channels per array element + Device supports stream priorities - + - Size in bytes + Device supports caching globals in L1 - + - Inner struct for CudaResourceDesc + Device supports caching locals in L1 - + - Device pointer + Maximum shared memory available per multiprocessor in bytes - + - Array format + Maximum number of 32-bit registers available per multiprocessor - + - Channels per array element + Device can allocate managed memory on this system - + - Width of the array in elements + Device is on a multi-GPU board - + - Height of the array in elements + Unique id for a group of devices on the same multi-GPU board - + - Pitch between two rows in bytes + Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware) - + - Mimics the union "CUDA_RESOURCE_DESC.res" in cuda.h + Ratio of single precision performance (in floating-point operations per second) to double precision performance - + - CUDA array + Device supports coherently accessing pageable memory without calling cudaHostRegister on it - + - CUDA mipmapped array + Device can coherently access managed memory concurrently with the CPU - + - Linear memory + Device supports compute preemption. - + - Linear pitched 2D memory + Device can access host registered memory at the same virtual address as the CPU. - + - CUDA Resource descriptor + ::cuStreamBatchMemOp and related APIs are supported. - + - + 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. - - + - + ::CU_STREAM_WAIT_VALUE_NOR is supported. - - + - + Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel - - + - + Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice - - + - + Maximum optin shared memory per block - - + - + Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. - - + - + Device supports host memory registration via ::cudaHostRegister. - - + - + Device accesses pageable memory via the host's page tables. - - + - + The host can directly access managed memory on the device without migration. - - + - + Deprecated, Use VirtualMemoryManagementSupported - - + - + Device supports virtual memory management APIs like ::cuMemAddressReserve, ::cuMemCreate, ::cuMemMap and related APIs - - + - + Device supports exporting memory to a posix file descriptor with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate - - + - + Device supports exporting memory to a Win32 NT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate - - + - + Device supports exporting memory to a Win32 KMT handle with ::cuMemExportToShareableHandle, if requested ::cuMemCreate - - + - + Maximum number of blocks per multiprocessor - - + - + Device supports compression of memory - - + - + Device's maximum L2 persisting lines capacity setting in bytes - - + - + The maximum value of CUaccessPolicyWindow::num_bytes. - - + - + Device supports specifying the GPUDirect RDMA flag with ::cuMemCreate - - + - + Shared memory reserved by CUDA driver per block in bytes - - + - + Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays - - + - + Device supports using the ::cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU - - + - + External timeline semaphore interop is supported on the device - - + - + Device supports using the ::cuMemAllocAsync and ::cuMemPool family of APIs - - + - + Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information) - - + - + The returned attribute shall be interpreted as a bitmask, where the individual bits are described by the ::CUflushGPUDirectRDMAWritesOptions enum - - + - + GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See ::CUGPUDirectRDMAWritesOrdering for the numerical values returned here. - - + - + Handle types supported with mempool based IPC - - + - + Max elems... - - + - + Texture reference filtering modes - - + - + Point filter mode - - + - + Linear filter mode - - + - + Function properties - - + - + The number of threads beyond which a launch of the function would fail. + This number depends on both the function and the device on which the + function is currently loaded. - - + - + The size in bytes of statically-allocated shared memory required by + this function. This does not include dynamically-allocated shared + memory requested by the user at runtime. - - + - + The size in bytes of statically-allocated shared memory required by + this function. This does not include dynamically-allocated shared + memory requested by the user at runtime. - - + - + The size in bytes of thread local memory used by this function. - - + - + The number of registers used by each thread of this function. - - + - + The PTX virtual architecture version for which the function was + compiled. This value is the major PTX version * 10 + the minor PTX version, so a PTX version 1.3 function + would return the value 13. Note that this may return the undefined value of 0 for cubins compiled prior to CUDA + 3.0. - - + - + The binary version for which the function was compiled. This + value is the major binary version * 10 + the minor binary version, so a binary version 1.3 function would return + the value 13. Note that this will return a value of 10 for legacy cubins that do not have a properly-encoded binary + architecture version. - - + - + The attribute to indicate whether the function has been compiled with + user specified option "-Xptxas --dlcm=ca" set. - - + - + The maximum size in bytes of dynamically-allocated shared memory that can be used by + this function. If the user-specified dynamic shared memory size is larger than this + value, the launch will fail. - - + - + On devices where the L1 cache and shared memory use the same hardware resources, + this sets the shared memory carveout preference, in percent of the total resources. + This is only a hint, and the driver can choose a different ratio if required to execute the function. - - + - + No descritption found... - - + - + Function cache configurations - - + - + No preference for shared memory or L1 (default) - - + - + Function prefers larger shared memory and smaller L1 cache. - - + - + Function prefers larger L1 cache and smaller shared memory. - - + - Resource type + Function prefers equal sized L1 cache and shared memory. - + - Mimics the union in C++ + Cubin matching fallback strategies - + - Flags (must be zero) + Prefer to compile ptx if exact binary match not found - + - Texture descriptor + Prefer to fall back to compatible binary code if exact binary match not found - + - Creates a new CudaTextureDescriptor + Online compiler options - Address modes for all dimensions - Filter mode - Flags - + - Creates a new CudaTextureDescriptor + Max number of registers that a thread may use. + Option type: unsigned int + Applies to: compiler only - Address modes for all dimensions - Filter mode - Flags - borderColor (array of size 4) - + - Creates a new CudaTextureDescriptor + IN: Specifies minimum number of threads per block to target compilation + for + OUT: Returns the number of threads the compiler actually targeted. + This restricts the resource utilization fo the compiler (e.g. max + registers) such that a block with the given number of threads should be + able to launch based on register limitations. Note, this option does not + currently take into account any other resource limitations, such as + shared memory utilization. + Option type: unsigned int + Applies to: compiler only - Address modes for dimension 0 - Address modes for dimension 1 - Address modes for dimension 2 - Filter mode - Flags - + - Creates a new CudaTextureDescriptor + Returns a float value in the option of the wall clock time, in + milliseconds, spent creating the cubin + Option type: float + Applies to: compiler and linker - Address modes for dimension 0 - Address modes for dimension 1 - Address modes for dimension 2 - Filter mode - Flags - borderColor (array of size 4) - + - Creates a new CudaTextureDescriptor + Pointer to a buffer in which to print any log messsages from PTXAS + that are informational in nature (the buffer size is specified via + option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) + Option type: char* + Applies to: compiler and linker - Address modes for all dimensions - Filter mode - Flags - Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic - filtering. This value will be clamped to the range [1,16]. - Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between - two defined mipmap levels. - Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. - Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. - Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. - + - Creates a new CudaTextureDescriptor + IN: Log buffer size in bytes. Log messages will be capped at this size + (including null terminator) + OUT: Amount of log buffer filled with messages + Option type: unsigned int + Applies to: compiler and linker - Address modes for all dimensions - Filter mode - Flags - Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic - filtering. This value will be clamped to the range [1,16]. - Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between - two defined mipmap levels. - Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. - Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. - Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. - borderColor (array of size 4) - + - Creates a new CudaTextureDescriptor + Pointer to a buffer in which to print any log messages from PTXAS that + reflect errors (the buffer size is specified via option + ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES) + Option type: char* + Applies to: compiler and linker - Address modes for dimension 0 - Address modes for dimension 1 - Address modes for dimension 2 - Filter mode - Flags - Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic - filtering. This value will be clamped to the range [1,16]. - Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between - two defined mipmap levels. - Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. - Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. - Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. - + - Creates a new CudaTextureDescriptor + IN: Log buffer size in bytes. Log messages will be capped at this size + (including null terminator) + OUT: Amount of log buffer filled with messages + Option type: unsigned int + Applies to: compiler and linker - Address modes for dimension 0 - Address modes for dimension 1 - Address modes for dimension 2 - Filter mode - Flags - Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic - filtering. This value will be clamped to the range [1,16]. - Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between - two defined mipmap levels. - Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. - Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. - Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. - borderColor (array of size 4) - + - Address modes + Level of optimizations to apply to generated code (0 - 4), with 4 + being the default and highest level of optimizations. + Option type: unsigned int + Applies to: compiler only - + - Filter mode + No option value required. Determines the target based on the current + attached context (default) + Option type: No option value needed + Applies to: compiler and linker - + - Flags + Target is chosen based on supplied ::CUjit_target_enum. This option cannot be + used with cuLink* APIs as the linker requires exact matches. + Option type: unsigned int for enumerated type ::CUjit_target_enum + Applies to: compiler and linker - + - Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic - filtering. This value will be clamped to the range [1,16]. + Specifies choice of fallback strategy if matching cubin is not found. + Choice is based on supplied ::CUjit_fallback_enum. + Option type: unsigned int for enumerated type ::CUjit_fallback_enum + Applies to: compiler only - + - Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between - two defined mipmap levels. + Specifies whether to create debug information in output (-g) (0: false, default) + Option type: int + Applies to: compiler and linker - + - Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. + Generate verbose log messages (0: false, default) + Option type: int + Applies to: compiler and linker - + - Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. + Generate line number information (-lineinfo) (0: false, default) + Option type: int + Applies to: compiler only - + - Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. + Specifies whether to enable caching explicitly (-dlcm) + Choice is based on supplied ::CUjit_cacheMode_enum. + Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum + Applies to: compiler only - + - Border Color + The below jit options are used for internal purposes only, in this version of CUDA - + - Resource view descriptor + The below jit options are used for internal purposes only, in this version of CUDA - + - Resource view format + Array of device symbol names that will be relocated to the corresponing + host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES. + Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries. + When loding a device module, driver will relocate all encountered + unresolved symbols to the host addresses. + It is only allowed to register symbols that correspond to unresolved + global variables. + It is illegal to register the same device symbol at multiple addresses. + Option type: const char ** + Applies to: dynamic linker only - + - Width of the resource view + Array of host addresses that will be used to relocate corresponding + device symbols stored in ::CU_JIT_GLOBAL_SYMBOL_NAMES. + Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries. + Option type: void ** + Applies to: dynamic linker only - + - Height of the resource view + Number of entries in ::CU_JIT_GLOBAL_SYMBOL_NAMES and + ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays. + Option type: unsigned int + Applies to: dynamic linker only - + - Depth of the resource view + Enable link-time optimization (-dlto) for device code (0: false, default) + Option type: int + Applies to: compiler and linker - + - First defined mipmap level + Control single-precision denormals (-ftz) support (0: false, default). + 1 : flushes denormal values to zero + 0 : preserves denormal values + Option type: int + Applies to: link-time optimization specified with CU_JIT_LTO - + - Last defined mipmap level + Control single-precision floating-point division and reciprocals + (-prec-div) support (1: true, default). + 1 : Enables the IEEE round-to-nearest mode + 0 : Enables the fast approximation mode + Option type: int + Applies to: link-time optimization specified with CU_JIT_LTO - + - First layer index + Control single-precision floating-point square root + (-prec-sqrt) support (1: true, default). + 1 : Enables the IEEE round-to-nearest mode + 0 : Enables the fast approximation mode + Option type: int\n + Applies to: link-time optimization specified with CU_JIT_LTO - + - Last layer index + Enable/Disable the contraction of floating-point multiplies + and adds/subtracts into floating-point multiply-add (-fma) + operations (1: Enable, default; 0: Disable). + Option type: int\n + Applies to: link-time optimization specified with CU_JIT_LTO - + - GPU Direct v3 tokens + Online compilation targets - + - + Compute device class 2.0 - + - + Compute device class 2.1 - + - Per-operation parameters for ::cuStreamBatchMemOp + Compute device class 3.0 - - - - - - - - - - - - - - - - - - - - - - + - For driver internal use. Initial value is unimportant. + Compute device class 3.2 - - - - - - - - - - - - - - - - + - For driver internal use. Initial value is unimportant. + Compute device class 3.5 - - - - - - - - - - - - - - - - - - - - - - - - - + - Kernel launch parameters + Compute device class 3.7 - + - Kernel to launch + Compute device class 5.0 - + - Width of grid in blocks + Compute device class 5.2 - + - Height of grid in blocks + Compute device class 5.3 + - + - Depth of grid in blocks + Compute device class 6.0 - + - X dimension of each thread block + Compute device class 6.1 - + - Y dimension of each thread block + Compute device class 6.2. - + - Z dimension of each thread block + Compute device class 7.0. - + - Dynamic shared-memory size per thread block in bytes + Compute device class 7.0. - + - Stream identifier + Compute device class 7.5. - + - Array of pointers to kernel parameters + Compute device class 8.0. - + - GPU kernel node parameters + Compute device class 8.6. - + - Kernel to launch + Online compilation optimization levels - + - Width of grid in blocks + No optimization - + - Height of grid in blocks + Optimization level 1 - + - Depth of grid in blocks + Optimization level 2 - + - X dimension of each thread block + Optimization level 3 - + - Y dimension of each thread block + Best, Default - + - Z dimension of each thread block + Caching modes for dlcm - + - Dynamic shared-memory size per thread block in bytes + Compile with no -dlcm flag specified - + - Array of pointers to kernel parameters + Compile with L1 cache disabled - + - Extra options + Compile with L1 cache enabled - + - Memset node parameters + Device code formats - + - Destination device pointer + Compiled device-class-specific device code + Applicable options: none - + - Pitch of destination device pointer. Unused if height is 1 + PTX source code + Applicable options: PTX compiler options - + - Value to be set + Bundle of multiple cubins and/or PTX of some device code + Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY - + - Size of each element in bytes. Must be 1, 2, or 4. + Host object with embedded device code + Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY - + - Width in bytes, of the row + Archive of host objects with embedded device code + Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY - + - Number of rows + High-level intermediate code for link-time optimization + Applicable options: NVVM compiler options, PTX compiler options - + - Initialieses the struct + Array indices for cube faces - - - - - + - Initialieses the struct + Positive X face of cubemap - - - - - + - Host node parameters + Negative X face of cubemap - + - The function to call when the node executes + Positive Y face of cubemap - + - Argument to pass to the function + Negative Y face of cubemap - + - Win32 handle referencing the semaphore object. Valid when - type is one of the following: - - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 - - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT - - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP - - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE - Exactly one of 'handle' and 'name' must be non-NULL. If - type is - ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT - then 'name' must be NULL. + Positive Z face of cubemap - + - Valid NT handle. Must be NULL if 'name' is non-NULL + Negative Z face of cubemap - + - Name of a valid memory object. Must be NULL if 'handle' is non-NULL. + Limits - + - External memory handle descriptor + GPU thread stack size - + - Type of the handle + GPU printf FIFO size - + - File descriptor referencing the memory object. Valid when type is CUDA_EXTERNAL_MEMORY_DEDICATED + GPU malloc heap size - + - Win32 handle referencing the semaphore object. + GPU device runtime launch synchronize depth - + - Size of the memory allocation + GPU device runtime pending launch count - + - Flags must either be zero or ::CUDA_EXTERNAL_MEMORY_DEDICATED + A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint - + - External semaphore handle descriptor + A size in bytes for L2 persisting lines cache size - + - Type of the handle + Memory types - + - File descriptor referencing the semaphore object. Valid when type is CUDA_EXTERNAL_MEMORY_DEDICATED + Host memory - + - Win32 handle referencing the semaphore object. + Device memory - + - Flags reserved for the future. Must be zero. + Array memory - + - External memory buffer descriptor + Unified device or host memory - + - Offset into the memory object where the buffer's base is + Resource types - + - Size of the buffer + Array resoure - + - Flags reserved for future use. Must be zero. + Mipmapped array resource - + - External memory mipmap descriptor + Linear resource - + - Offset into the memory object where the base level of the mipmap chain is. + Pitch 2D resource - + - Format, dimension and type of base level of the mipmap chain + Error codes returned by CUDA driver API calls - + - Total number of levels in the mipmap chain + No errors - + - External semaphore signal parameters + Invalid value - + - Parameters for fence objects + Out of memory - + - Value of fence to be signaled + Driver not initialized - + - Value of fence to be signaled + Driver deinitialized - + - Flags reserved for the future. Must be zero. + This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools + like visual profiler. - + - External semaphore wait parameters + This error return is deprecated as of CUDA 5.0. It is no longer an error + to attempt to enable/disable the profiling via ::cuProfilerStart or + ::cuProfilerStop without initialization. - + - Parameters for fence objects + This error return is deprecated as of CUDA 5.0. It is no longer an error + to call cuProfilerStart() when profiling is already enabled. - + - Value of fence to be waited on + This error return is deprecated as of CUDA 5.0. It is no longer an error + to call cuProfilerStop() when profiling is already disabled. - + - Value of fence to be waited on + This indicates that the CUDA driver that the application has loaded is a + stub library. Applications that run with the stub rather than a real + driver loaded will result in CUDA API returning this error. - + - Flags reserved for the future. Must be zero. + No CUDA-capable device available - + - Texture reference addressing modes + Invalid device - + - Wrapping address mode + This error indicates that the Grid license is not applied. - + - Clamp to edge address mode + Invalid kernel image - + - Mirror address mode + Invalid context - + - Border address mode + Context already current - + - Array formats + Map failed - + - Unsigned 8-bit integers + Unmap failed - + - Unsigned 16-bit integers + Array is mapped - + - Unsigned 32-bit integers + Already mapped - + - Signed 8-bit integers + No binary for GPU - + - Signed 16-bit integers + Already acquired - + - Signed 32-bit integers + Not mapped - + - 16-bit floating point + Mapped resource not available for access as an array - + - 32-bit floating point + Mapped resource not available for access as a pointer - + - Compute mode that device is currently in. + Uncorrectable ECC error detected - + - Default mode - Device is not restricted and can have multiple CUDA - contexts present at a single time. + CULimit not supported by device - + - Compute-prohibited mode - Device is prohibited from creating - new CUDA contexts. + This indicates that the passed to the API call can + only be bound to a single CPU thread at a time but is already + bound to a CPU thread. - + - Compute-exclusive-process mode (Only one context used by a - single process can be present on this device at a time) + This indicates that peer access is not supported across the given devices. - + - Memory advise values + This indicates that a PTX JIT compilation failed. - + - Data will mostly be read and only occassionally be written to + This indicates an error with OpenGL or DirectX context. - + - Undo the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY + This indicates that an uncorrectable NVLink error was detected during the execution. - + - Set the preferred location for the data as the specified device + This indicates that the PTX JIT compiler library was not found. - + - Clear the preferred location for the data + This indicates that the provided PTX was compiled with an unsupported toolchain. - + - Data will be accessed by the specified device, so prevent page faults as much as possible + This indicates that the PTX JIT compilation was disabled. - + - Let the Unified Memory subsystem decide on the page faulting policy for the specified device + This indicates that the ::CUexecAffinityType passed to the API call is not supported by the active device. - + - Context Attach flags + Invalid source - + - None + File not found - + - Device properties + Link to a shared object failed to resolve - + - Maximum number of threads per block + Shared object initialization failed - + - Maximum block dimension X + OS call failed - + - Maximum block dimension Y + Invalid handle - + - Maximum block dimension Z + This indicates that a resource required by the API call is not in a + valid state to perform the requested operation. - + - Maximum grid dimension X + Not found - + - Maximum grid dimension Y + CUDA not ready - + - Maximum grid dimension Z + While executing a kernel, the device encountered a + load or store instruction on an invalid memory address. + This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Maximum amount of shared memory - available to a thread block in bytes; this amount is shared by all thread blocks simultaneously resident on a - multiprocessor + Launch exceeded resources - + - Deprecated, use MaxSharedMemoryPerBlock + This indicates that the device kernel took too long to execute. This can + only occur if timeouts are enabled - see the device attribute + ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. + This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Memory available on device for __constant__ variables in a CUDA C kernel in bytes + Launch with incompatible texturing - + - Warp size in threads + This error indicates that a call to is + trying to re-enable peer access to a context which has already + had peer access to it enabled. - + - Maximum pitch in bytes allowed by the memory copy functions - that involve memory regions allocated through + This error indicates that is + trying to disable peer access which has not been enabled yet + via . - + - Deprecated, use MaxRegistersPerBlock + This error indicates that the primary context for the specified device + has already been initialized. - + - Maximum number of 32-bit registers available - to a thread block; this number is shared by all thread blocks simultaneously resident on a multiprocessor + This error indicates that the context current to the calling thread + has been destroyed using , or is a primary context which + has not yet been initialized. - + - Typical clock frequency in kilohertz + A device-side assert triggered during kernel execution. The context + cannot be used anymore, and must be destroyed. All existing device + memory allocations from this context are invalid and must be + reconstructed if the program is to continue using CUDA. - + - Alignment requirement; texture base addresses - aligned to textureAlign bytes do not need an offset applied to texture fetches + This error indicates that the hardware resources required to enable + peer access have been exhausted for one or more of the devices + passed to ::cuCtxEnablePeerAccess(). - + - 1 if the device can concurrently copy memory between host - and device while executing a kernel, or 0 if not + This error indicates that the memory range passed to ::cuMemHostRegister() + has already been registered. - + - Number of multiprocessors on device + This error indicates that the pointer passed to ::cuMemHostUnregister() + does not correspond to any currently registered memory region. - + - Specifies whether there is a run time limit on kernels. - 1 if there is a run time limit for kernels executed on the device, or 0 if not + While executing a kernel, the device encountered a stack error. + This can be due to stack corruption or exceeding the stack size limit. + This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Device is integrated with host memory. 1 if the device is integrated with the memory subsystem, or 0 if not + While executing a kernel, the device encountered an illegal instruction. + This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Device can map host memory into CUDA address space. 1 if the device can map host memory into the - CUDA address space, or 0 if not + While executing a kernel, the device encountered a load or store instruction + on a memory address which is not aligned. + This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Compute mode (See for details) + While executing a kernel, the device encountered an instruction + which can only operate on memory locations in certain address spaces + (global, shared, or local), but was supplied a memory address not + belonging to an allowed address space. + This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Maximum 1D texture width + While executing a kernel, the device program counter wrapped its address space. + This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Maximum 2D texture width + An exception occurred on the device while executing a kernel. Common + causes include dereferencing an invalid device pointer and accessing + out of bounds shared memory. This leaves the process in an inconsistent state and any further CUDA work + will return the same error. To continue using CUDA, the process must be terminated + and relaunched. - + - Maximum 2D texture height + This error indicates that the number of blocks launched per grid for a kernel that was + launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice + exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor + or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors + as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. - + - Maximum 3D texture width + This error indicates that the attempted operation is not permitted. - + - Maximum 3D texture height + This error indicates that the attempted operation is not supported + on the current system or device. - + - Maximum 3D texture depth + This error indicates that the system is not yet ready to start any CUDA + work. To continue using CUDA, verify the system configuration is in a + valid state and all required driver daemons are actively running. - + - Maximum texture array width + This error indicates that there is a mismatch between the versions of + the display driver and the CUDA driver. Refer to the compatibility documentation + for supported versions. - + - Maximum texture array height + This error indicates that the system was upgraded to run with forward compatibility + but the visible hardware detected by CUDA does not support this configuration. + Refer to the compatibility documentation for the supported hardware matrix or ensure + that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES + environment variable. - + - Maximum slices in a texture array + This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server. - + - Alignment requirement for surfaces + This error indicates that the remote procedural call between the MPS server and the MPS client failed. - + - Device can possibly execute multiple kernels concurrently. - 1 if the device supports executing multiple kernels - within the same context simultaneously, or 0 if not. It is not guaranteed that multiple kernels will be resident on - the device concurrently so this feature should not be relied upon for correctness. + This error indicates that the MPS server is not ready to accept new MPS client requests. This error can be returned when the MPS server is in the process of recovering from a fatal failure. - + - Device has ECC support enabled. 1 if error correction is enabled on the device, 0 if error correction - is disabled or not supported by the device. + This error indicates that the hardware resources required to create MPS client have been exhausted. - + - PCI bus ID of the device + This error indicates the the hardware resources required to support device connections have been exhausted. - + - PCI device ID of the device + This error indicates that the operation is not permitted when the stream is capturing. - + - Device is using TCC driver model + This error indicates that the current capture sequence on the stream + has been invalidated due to a previous error. - + - Peak memory clock frequency in kilohertz + This error indicates that the operation would have resulted in a merge of two independent capture sequences. - + - Global memory bus width in bits + This error indicates that the capture was not initiated in this stream. - + - Size of L2 cache in bytes + This error indicates that the capture sequence contains a fork that was not joined to the primary stream. - + - Maximum resident threads per multiprocessor + This error indicates that a dependency would have been created which + crosses the capture sequence boundary. Only implicit in-stream ordering + dependencies are allowed to cross the boundary. - + - Number of asynchronous engines + This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy. - + - Device shares a unified address space with the host + This error indicates that the operation is not permitted on an event which + was last recorded in a capturing stream. - + - Maximum 1D layered texture width + A stream capture sequence not initiated with the ::CU_STREAM_CAPTURE_MODE_RELAXED + argument to ::cuStreamBeginCapture was passed to ::cuStreamEndCapture in a + different thread. - + - Maximum layers in a 1D layered texture + This error indicates that the timeout specified for the wait operation has lapsed. - + - PCI domain ID of the device + This error indicates that the graph update was not performed because it included + changes which violated constraints specific to instantiated graph update. - + - Pitch alignment requirement for textures + Unknown error - + - Maximum cubemap texture width/height + P2P Attributes - + - Maximum cubemap layered texture width/height + A relative value indicating the performance of the link between two devices - + - Maximum layers in a cubemap layered texture + P2P Access is enable - + - Maximum 1D surface width + Atomic operation over the link supported - + - Maximum 2D surface width + \deprecated use CudaArrayAccessAccessSupported instead - + - Maximum 2D surface height + Accessing CUDA arrays over the link supported - + - Maximum 3D surface width + CUTexRefSetArrayFlags - + - Maximum 3D surface height + - + - Maximum 3D surface depth + Override the texref format with a format inferred from the array. + Flag for . - + - Maximum 1D layered surface width + CUParameterTexRef - + - Maximum layers in a 1D layered surface + For texture references loaded into the module, use default texunit from texture reference. - + - Maximum 2D layered surface width + CUSurfRefSetFlags - + - Maximum 2D layered surface height + Currently no CUSurfRefSetFlags flags are defined. - + - Maximum layers in a 2D layered surface + Pointer information - + - Maximum cubemap surface width + The on which a pointer was allocated or registered - + - Maximum cubemap layered surface width + The describing the physical location of a pointer - + - Maximum layers in a cubemap layered surface + The address at which a pointer's memory may be accessed on the device - + - Maximum 1D linear texture width + The address at which a pointer's memory may be accessed on the host - + - Maximum 2D linear texture width + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + - Maximum 2D linear texture height + Synchronize every synchronous memory operation initiated on this region - + - Maximum 2D linear texture pitch in bytes + A process-wide unique ID for an allocated memory region - + - Maximum mipmapped 2D texture width + Indicates if the pointer points to managed memory - + - Maximum mipmapped 2D texture height + A device ordinal of a device on which a pointer was allocated or registered - + - Major compute capability version number + 1 if this pointer maps to an allocation that is suitable for ::cudaIpcGetMemHandle, 0 otherwise - + - Minor compute capability version number + Starting address for this requested pointer - + - Maximum mipmapped 1D texture width + Size of the address range for this requested pointer - + - Device supports stream priorities + 1 if this pointer is in a valid address range that is mapped to a backing allocation, 0 otherwise - + - Device supports caching globals in L1 + Bitmask of allowed ::CUmemAllocationHandleType for this allocation - + - Device supports caching locals in L1 + 1 if the memory this pointer is referencing can be used with the GPUDirect RDMA API - + - Maximum shared memory available per multiprocessor in bytes + Returns the access flags the device associated with the current context has on the corresponding memory referenced by the pointer given - + - Maximum number of 32-bit registers available per multiprocessor + Returns the mempool handle for the allocation if it was allocated from a mempool. Otherwise returns NULL. - + - Device can allocate managed memory on this system + CUDA devices corresponding to a D3D11, D3D10 or D3D9 device - + - Device is on a multi-GPU board + The CUDA devices for all GPUs used by a D3D11 device. - + - Unique id for a group of devices on the same multi-GPU board + The CUDA devices for the GPUs used by a D3D11 device in its currently rendering frame (in SLI). - + - Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware) + The CUDA devices for the GPUs to be used by a D3D11 device in the next frame (in SLI). - + - Ratio of single precision performance (in floating-point operations per second) to double precision performance + CUDA devices corresponding to an OpenGL device. - + - Device supports coherently accessing pageable memory without calling cudaHostRegister on it + The CUDA devices for all GPUs used by the current OpenGL context - + - Device can coherently access managed memory concurrently with the CPU + The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame - + - Device supports compute preemption. + The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame - + - Device can access host registered memory at the same virtual address as the CPU. + Shared memory configurations - + - ::cuStreamBatchMemOp and related APIs are supported. + set default shared memory bank size - + - 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. + set shared memory bank width to four bytes - + - ::CU_STREAM_WAIT_VALUE_NOR is supported. + set shared memory bank width to eight bytes - + - Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel + CUipcMem_flags - + - Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice + Automatically enable peer access between remote devices as needed - + - Maximum optin shared memory per block + Resource view format - + - Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. + No resource view format (use underlying resource format) - + - Device supports host memory registration via ::cudaHostRegister. + 1 channel unsigned 8-bit integers - + - Device accesses pageable memory via the host's page tables. + 2 channel unsigned 8-bit integers - + - The host can directly access managed memory on the device without migration. + 4 channel unsigned 8-bit integers - + - Max elems... + 1 channel signed 8-bit integers - + - Texture reference filtering modes + 2 channel signed 8-bit integers - + - Point filter mode + 4 channel signed 8-bit integers - + - Linear filter mode + 1 channel unsigned 16-bit integers - + - Function properties + 2 channel unsigned 16-bit integers - + - The number of threads beyond which a launch of the function would fail. - This number depends on both the function and the device on which the - function is currently loaded. + 4 channel unsigned 16-bit integers - + - The size in bytes of statically-allocated shared memory required by - this function. This does not include dynamically-allocated shared - memory requested by the user at runtime. + 1 channel signed 16-bit integers - + - The size in bytes of statically-allocated shared memory required by - this function. This does not include dynamically-allocated shared - memory requested by the user at runtime. + 2 channel signed 16-bit integers - + - The size in bytes of thread local memory used by this function. + 4 channel signed 16-bit integers - + - The number of registers used by each thread of this function. + 1 channel unsigned 32-bit integers - + - The PTX virtual architecture version for which the function was - compiled. This value is the major PTX version * 10 + the minor PTX version, so a PTX version 1.3 function - would return the value 13. Note that this may return the undefined value of 0 for cubins compiled prior to CUDA - 3.0. + 2 channel unsigned 32-bit integers - + - The binary version for which the function was compiled. This - value is the major binary version * 10 + the minor binary version, so a binary version 1.3 function would return - the value 13. Note that this will return a value of 10 for legacy cubins that do not have a properly-encoded binary - architecture version. + 4 channel unsigned 32-bit integers - + - The attribute to indicate whether the function has been compiled with - user specified option "-Xptxas --dlcm=ca" set. + 1 channel signed 32-bit integers - + - The maximum size in bytes of dynamically-allocated shared memory that can be used by - this function. If the user-specified dynamic shared memory size is larger than this - value, the launch will fail. + 2 channel signed 32-bit integers - + - On devices where the L1 cache and shared memory use the same hardware resources, - this sets the shared memory carveout preference, in percent of the total resources. - This is only a hint, and the driver can choose a different ratio if required to execute the function. + 4 channel signed 32-bit integers - + - No descritption found... + 1 channel 16-bit floating point - + - Function cache configurations + 2 channel 16-bit floating point - + - No preference for shared memory or L1 (default) + 4 channel 16-bit floating point - + - Function prefers larger shared memory and smaller L1 cache. + 1 channel 32-bit floating point - + - Function prefers larger L1 cache and smaller shared memory. + 2 channel 32-bit floating point - + - Function prefers equal sized L1 cache and shared memory. + 4 channel 32-bit floating point - + - Cubin matching fallback strategies + Block compressed 1 - + - Prefer to compile ptx if exact binary match not found + Block compressed 2 - + - Prefer to fall back to compatible binary code if exact binary match not found + Block compressed 3 - + - Online compiler options + Block compressed 4 unsigned - + - Max number of registers that a thread may use. - Option type: unsigned int - Applies to: compiler only + Block compressed 4 signed - + - IN: Specifies minimum number of threads per block to target compilation - for - OUT: Returns the number of threads the compiler actually targeted. - This restricts the resource utilization fo the compiler (e.g. max - registers) such that a block with the given number of threads should be - able to launch based on register limitations. Note, this option does not - currently take into account any other resource limitations, such as - shared memory utilization. - Option type: unsigned int - Applies to: compiler only + Block compressed 5 unsigned - + - Returns a float value in the option of the wall clock time, in - milliseconds, spent creating the cubin - Option type: float - Applies to: compiler and linker + Block compressed 5 signed - + - Pointer to a buffer in which to print any log messsages from PTXAS - that are informational in nature (the buffer size is specified via - option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) - Option type: char* - Applies to: compiler and linker + Block compressed 6 unsigned half-float - + - IN: Log buffer size in bytes. Log messages will be capped at this size - (including null terminator) - OUT: Amount of log buffer filled with messages - Option type: unsigned int - Applies to: compiler and linker + Block compressed 6 signed half-float - + - Pointer to a buffer in which to print any log messages from PTXAS that - reflect errors (the buffer size is specified via option - ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES) - Option type: char* - Applies to: compiler and linker + Block compressed 7 - + - IN: Log buffer size in bytes. Log messages will be capped at this size - (including null terminator) - OUT: Amount of log buffer filled with messages - Option type: unsigned int - Applies to: compiler and linker + Profiler Output Modes - + - Level of optimizations to apply to generated code (0 - 4), with 4 - being the default and highest level of optimizations. - Option type: unsigned int - Applies to: compiler only + Output mode Key-Value pair format. - + - No option value required. Determines the target based on the current - attached context (default) - Option type: No option value needed - Applies to: compiler and linker + Output mode Comma separated values format. - + - Target is chosen based on supplied ::CUjit_target_enum. This option cannot be - used with cuLink* APIs as the linker requires exact matches. - Option type: unsigned int for enumerated type ::CUjit_target_enum - Applies to: compiler and linker + CUDA Mem Attach Flags - + - Specifies choice of fallback strategy if matching cubin is not found. - Choice is based on supplied ::CUjit_fallback_enum. - Option type: unsigned int for enumerated type ::CUjit_fallback_enum - Applies to: compiler only + Memory can be accessed by any stream on any device - + - Specifies whether to create debug information in output (-g) (0: false, default) - Option type: int - Applies to: compiler and linker + Memory cannot be accessed by any stream on any device - + - Generate verbose log messages (0: false, default) - Option type: int - Applies to: compiler and linker + Memory can only be accessed by a single stream on the associated device - + - Generate line number information (-lineinfo) (0: false, default) - Option type: int - Applies to: compiler only + Occupancy calculator flag - + - Specifies whether to enable caching explicitly (-dlcm) - Choice is based on supplied ::CUjit_cacheMode_enum. - Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum - Applies to: compiler only + Default behavior - + - The below jit options are used for internal purposes only, in this version of CUDA + Assume global caching is enabled and cannot be automatically turned off - + - The below jit options are used for internal purposes only, in this version of CUDA + cudaDataType - + - Array of device symbol names that will be relocated to the corresponing - host addresses stored in ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES. - Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries. - When loding a device module, driver will relocate all encountered - unresolved symbols to the host addresses. - It is only allowed to register symbols that correspond to unresolved - global variables. - It is illegal to register the same device symbol at multiple addresses. - Option type: const char ** - Applies to: dynamic linker only + 16 bit real - + - Array of host addresses that will be used to relocate corresponding - device symbols stored in ::CU_JIT_GLOBAL_SYMBOL_NAMES. - Must contain ::CU_JIT_GLOBAL_SYMBOL_COUNT entries. - Option type: void ** - Applies to: dynamic linker only + 16 bit complex - + - Number of entries in ::CU_JIT_GLOBAL_SYMBOL_NAMES and - ::CU_JIT_GLOBAL_SYMBOL_ADDRESSES arrays. - Option type: unsigned int - Applies to: dynamic linker only + 32 bit real - + - Online compilation targets + 32 bit complex - + - Compute device class 2.0 + 64 bit real - + - Compute device class 2.1 + 64 bit complex - + - Compute device class 3.0 + 8 bit real as a signed integer - + - Compute device class 3.2 + 8 bit complex as a pair of signed integers - + - Compute device class 3.5 + 8 bit real as a signed integer - + - Compute device class 3.7 + 8 bit complex as a pair of signed integers - + - Compute device class 5.0 + real as a nv_bfloat16 - + - Compute device class 5.2 + complex as a pair of nv_bfloat16 numbers - + - Compute device class 5.3 + real as a signed 4-bit int - - + - Compute device class 6.0 + complex as a pair of signed 4-bit int numbers - + - Compute device class 6.1 + real as a unsigned 4-bit int - + - Compute device class 6.2. + complex as a pair of unsigned 4-bit int numbers - + - Compute device class 7.0. + real as a signed 16-bit int - + - Compute device class 7.5. + complex as a pair of signed 16-bit int numbers - + - Online compilation optimization levels + real as a unsigned 16-bit int - + - No optimization + complex as a pair of unsigned 16-bit int numbers - + - Optimization level 1 + real as a signed 32-bit int - + - Optimization level 2 + complex as a pair of signed 32-bit int numbers - + - Optimization level 3 + real as a unsigned 32-bit int - + - Best, Default + complex as a pair of unsigned 32-bit int numbers - + - Caching modes for dlcm + real as a signed 64-bit int - + - Compile with no -dlcm flag specified + complex as a pair of signed 64-bit int numbers - + - Compile with L1 cache disabled + real as a unsigned 64-bit int - + - Compile with L1 cache enabled + complex as a pair of unsigned 64-bit int numbers - + + + + + + + + + + + + + - Device code formats + Operations for ::cuStreamBatchMemOp - + - Compiled device-class-specific device code - Applicable options: none + Represents a ::cuStreamWaitValue32 operation - + - PTX source code - Applicable options: PTX compiler options + Represents a ::cuStreamWriteValue32 operation - + - Bundle of multiple cubins and/or PTX of some device code - Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY + Represents a ::cuStreamWaitValue64 operation - + - Host object with embedded device code - Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY + Represents a ::cuStreamWriteValue64 operation - + - Archive of host objects with embedded device code - Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY + This has the same effect as ::CU_STREAM_WAIT_VALUE_FLUSH, but as a standalone operation. - + - Array indices for cube faces + - + - Positive X face of cubemap + Whether the range will mostly be read and only occassionally be written to - + - Negative X face of cubemap + The preferred location of the range - + - Positive Y face of cubemap + Memory range has ::CU_MEM_ADVISE_SET_ACCESSED_BY set for specified device - + - Negative Y face of cubemap + The last location to which the range was prefetched - + - Positive Z face of cubemap + Shared memory carveout configurations - + - Negative Z face of cubemap + no preference for shared memory or L1 (default) - + - Limits + prefer maximum available shared memory, minimum L1 cache - + - GPU thread stack size + prefer maximum available L1 cache, minimum shared memory - + - GPU printf FIFO size + Graph node types - + - GPU malloc heap size + GPU kernel node - + - GPU device runtime launch synchronize depth + Memcpy node - + - GPU device runtime pending launch count + Memset node - + - A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint + Host (executable) node - + - Memory types + Node which executes an embedded graph - + - Host memory + Empty (no-op) node - + - Device memory + External event wait node - + - Array memory + External event record node - + - Unified device or host memory + External semaphore signal node - + - Resource types + External semaphore wait node - + - Array resoure + Memory Allocation Node - + - Mipmapped array resource + Memory Free Node - + - Linear resource + Possible stream capture statuses returned by ::cuStreamIsCapturing - + - Pitch 2D resource + Stream is not capturing - + - Error codes returned by CUDA driver API calls + Stream is actively capturing - + - No errors + Stream is part of a capture sequence that has been invalidated, but not terminated - + - Invalid value + Possible modes for stream capture thread interactions. For more details see ::cuStreamBeginCapture and ::cuThreadExchangeStreamCaptureMode - + - Out of memory + - + - Driver not initialized + - + - Driver deinitialized + - + - This indicates profiler is not initialized for this run. This can happen when the application is running with external profiling tools - like visual profiler. + External memory handle types - + - This error return is deprecated as of CUDA 5.0. It is no longer an error - to attempt to enable/disable the profiling via ::cuProfilerStart or - ::cuProfilerStop without initialization. + Handle is an opaque file descriptor - + - This error return is deprecated as of CUDA 5.0. It is no longer an error - to call cuProfilerStart() when profiling is already enabled. + Handle is an opaque shared NT handle - + - This error return is deprecated as of CUDA 5.0. It is no longer an error - to call cuProfilerStop() when profiling is already disabled. + Handle is an opaque, globally shared handle - + - No CUDA-capable device available + Handle is a D3D12 heap object - + - Invalid device + Handle is a D3D12 committed resource - + - Invalid kernel image + Handle is a shared NT handle to a D3D11 resource - + - Invalid context + Handle is a globally shared handle to a D3D11 resource - + - Context already current + Handle is an NvSciBuf object - + - Map failed + External semaphore handle types - + - Unmap failed + Handle is an opaque file descriptor - + - Array is mapped + Handle is an opaque shared NT handle - + - Already mapped + Handle is an opaque, globally shared handle - + - No binary for GPU + Handle is a shared NT handle referencing a D3D12 fence object - + - Already acquired + Handle is a shared NT handle referencing a D3D11 fence object - + - Not mapped + Opaque handle to NvSciSync Object - + - Mapped resource not available for access as an array + Handle is a shared NT handle referencing a D3D11 keyed mutex object - + - Mapped resource not available for access as a pointer + Handle is a globally shared handle referencing a D3D11 keyed mutex object - + - Uncorrectable ECC error detected + Handle is an opaque file descriptor referencing a timeline semaphore - + - CULimit not supported by device + Handle is an opaque shared NT handle referencing a timeline semaphore - + - This indicates that the passed to the API call can - only be bound to a single CPU thread at a time but is already - bound to a CPU thread. + Specifies the type of location - + - This indicates that peer access is not supported across the given devices. + - + - This indicates that a PTX JIT compilation failed. + Location is a device location, thus id is a device ordinal - + - This indicates an error with OpenGL or DirectX context. + Defines the allocation types available - + - This indicates that an uncorrectable NVLink error was detected during the execution. + - + - This indicates that the PTX JIT compiler library was not found. + This allocation type is 'pinned', i.e. cannot migrate from its current + location while the application is actively using it - + - Invalid source + - + - File not found + The update succeeded - + - Link to a shared object failed to resolve + The update failed for an unexpected reason which is described in the return value of the function - + - Shared object initialization failed + The update failed because the topology changed - + - OS call failed + The update failed because a node type changed - + - Invalid handle + The update failed because the function of a kernel node changed - + - This indicates that a resource required by the API call is not in a - valid state to perform the requested operation. + The update failed because the parameters changed in a way that is not supported - + - Not found + The update failed because something about the node is not supported - + - CUDA not ready + The update failed because the function of a kernel node changed in an unsupported way - + - While executing a kernel, the device encountered a - load or store instruction on an invalid memory address. - This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + Specifies performance hint with ::CUaccessPolicyWindow for hitProp and missProp members - + - Launch exceeded resources + Normal cache persistence. - + - This indicates that the device kernel took too long to execute. This can - only occur if timeouts are enabled - see the device attribute - ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. - This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + Streaming access is less likely to persit from cache. - + - Launch with incompatible texturing + Persisting access is more likely to persist in cache. - + - This error indicates that a call to is - trying to re-enable peer access to a context which has already - had peer access to it enabled. + - + + + + + + + + + + + + + - This error indicates that is - trying to disable peer access which has not been enabled yet - via . + Graph kernel node Attributes - + - This error indicates that the primary context for the specified device - has already been initialized. + Identifier for ::CUkernelNodeAttrValue::accessPolicyWindow. - + - This error indicates that the context current to the calling thread - has been destroyed using , or is a primary context which - has not yet been initialized. + Allows a kernel node to be cooperative (see ::cuLaunchCooperativeKernel). - + - A device-side assert triggered during kernel execution. The context - cannot be used anymore, and must be destroyed. All existing device - memory allocations from this context are invalid and must be - reconstructed if the program is to continue using CUDA. + - + - This error indicates that the hardware resources required to enable - peer access have been exhausted for one or more of the devices - passed to ::cuCtxEnablePeerAccess(). + Identifier for ::CUstreamAttrValue::accessPolicyWindow. - + - This error indicates that the memory range passed to ::cuMemHostRegister() - has already been registered. + ::CUsynchronizationPolicy for work queued up in this stream - + - This error indicates that the pointer passed to ::cuMemHostUnregister() - does not correspond to any currently registered memory region. + Specifies compression attribute for an allocation. - + - While executing a kernel, the device encountered a stack error. - This can be due to stack corruption or exceeding the stack size limit. - This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + Allocating non-compressible memory - + - While executing a kernel, the device encountered an illegal instruction. - This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + Allocating compressible memory - + - While executing a kernel, the device encountered a load or store instruction - on a memory address which is not aligned. - This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + - + - While executing a kernel, the device encountered an instruction - which can only operate on memory locations in certain address spaces - (global, shared, or local), but was supplied a memory address not - belonging to an allowed address space. - This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + - + - While executing a kernel, the device program counter wrapped its address space. - This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + This flag if set indicates that the memory will be used as a tile pool. - + - An exception occurred on the device while executing a kernel. Common - causes include dereferencing an invalid device pointer and accessing - out of bounds shared memory. This leaves the process in an inconsistent state and any further CUDA work - will return the same error. To continue using CUDA, the process must be terminated - and relaunched. + Access flags that specify the level of access the current context's device has + on the memory referenced. - + - This error indicates that the number of blocks launched per grid for a kernel that was - launched via either ::cuLaunchCooperativeKernel or ::cuLaunchCooperativeKernelMultiDevice - exceeds the maximum number of blocks as allowed by ::cuOccupancyMaxActiveBlocksPerMultiprocessor - or ::cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors - as specified by the device attribute ::CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT. + No access, meaning the device cannot access this memory at all, thus must be staged through accessible memory in order to complete certain operations - + - This error indicates that the attempted operation is not permitted. + Read-only access, meaning writes to this memory are considered invalid accesses and thus return error in that case. - + - This error indicates that the attempted operation is not supported - on the current system or device. + Read-write access, the device has full read-write access to the memory - + - This error indicates that the system is not yet ready to start any CUDA - work. To continue using CUDA, verify the system configuration is in a - valid state and all required driver daemons are actively running. + Sparse subresource types - + - This error indicates that the operation is not permitted when the stream is capturing. + - + - This error indicates that the current capture sequence on the stream - has been invalidated due to a previous error. + - + - This error indicates that the operation would have resulted in a merge of two independent capture sequences. + Memory operation types - + - This error indicates that the capture was not initiated in this stream. + - + - This error indicates that the capture sequence contains a fork that was not joined to the primary stream. + - + - This error indicates that a dependency would have been created which - crosses the capture sequence boundary. Only implicit in-stream ordering - dependencies are allowed to cross the boundary. + Memory handle types - + - This error indicates a disallowed implicit dependency on a current capture sequence from cudaStreamLegacy. + - - This error indicates that the operation is not permitted on an event which - was last recorded in a capturing stream. + - - - Unknown error - + + - + + + + + + + + + + + + + + + + + + + + + + - P2P Attributes + Flags for ::cuStreamUpdateCaptureDependencies - + - A relative value indicating the performance of the link between two devices + Add new nodes to the dependency set - + - P2P Access is enable + Replace the dependency set with the new nodes - + - Atomic operation over the link supported + Flags to specify search options. For more details see ::cuGetProcAddress - + - \deprecated use CudaArrayAccessAccessSupported instead + Default search mode for driver symbols. - + - Accessing CUDA arrays over the link supported + Search for legacy versions of driver symbols. - + - CUTexRefSetArrayFlags + Search for per-thread versions of driver symbols. - + - + Platform native ordering for GPUDirect RDMA writes - + - Override the texref format with a format inferred from the array. - Flag for . + The device does not natively support ordering of remote writes. ::cuFlushGPUDirectRDMAWrites() can be leveraged if supported. - + - CUParameterTexRef + Natively, the device can consistently consume remote writes, although other CUDA devices may not. - + - For texture references loaded into the module, use default texunit from texture reference. + Any CUDA device in the system can consistently consume remote writes to this device. - + - CUSurfRefSetFlags + The scopes for ::cuFlushGPUDirectRDMAWrites - + - Currently no CUSurfRefSetFlags flags are defined. + Blocks until remote writes are visible to the CUDA device context owning the data. - + - Pointer information + Blocks until remote writes are visible to all CUDA device contexts. - + - The on which a pointer was allocated or registered + The targets for ::cuFlushGPUDirectRDMAWrites - + - The describing the physical location of a pointer + Sets the target for ::cuFlushGPUDirectRDMAWrites() to the currently active CUDA device context. - + - The address at which a pointer's memory may be accessed on the device + Execution Affinity Types - + - The address at which a pointer's memory may be accessed on the host + Create a context with limited SMs. - + + + + + + + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + (value type = cuuint64_t) + Amount of memory, in bytes, currently associated with graphs - + - Synchronize every synchronous memory operation initiated on this region + (value type = cuuint64_t) + High watermark of memory, in bytes, associated with graphs since the last time it was reset. High watermark can only be reset to zero. - + - A process-wide unique ID for an allocated memory region + (value type = cuuint64_t) + Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. - + - Indicates if the pointer points to managed memory + (value type = cuuint64_t) + High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. - + - A device ordinal of a device on which a pointer was allocated or registered + CUDA array - + - CUDA devices corresponding to a D3D11, D3D10 or D3D9 device + - + - The CUDA devices for all GPUs used by a D3D11 device. + CUDA linker - + - The CUDA devices for the GPUs used by a D3D11 device in its currently rendering frame (in SLI). + - + - The CUDA devices for the GPUs to be used by a D3D11 device in the next frame (in SLI). + CUDA mipmapped array - + - CUDA devices corresponding to an OpenGL device. + - + - The CUDA devices for all GPUs used by the current OpenGL context + Cuda context - + - The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame + - + - The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame + Cuda device - + - Shared memory configurations + - + - set default shared memory bank size + Device that represents the CPU - + - set shared memory bank width to four bytes + Device that represents an invalid device - + - set shared memory bank width to eight bytes + Sets the current memory pool of a device + The memory pool must be local to the specified device. + ::cuMemAllocAsync allocates from the current mempool of the provided stream's device. + By default, a device's current memory pool is its default memory pool. + + note Use ::cuMemAllocFromPoolAsync to specify asynchronous allocations from a device different than the one the stream runs on. - + - CUipcMem_flags + Gets the current memory pool of the CUdevice. - + - Automatically enable peer access between remote devices as needed + Gets the default memory pool of the CUdevice. - + - Resource view format + Return an UUID for the device (11.4+) + Returns 16-octets identifing the device \p dev in the structure + pointed by the \p uuid.If the device is in MIG mode, returns its + MIG UUID which uniquely identifies the subscribed MIG compute instance. + Returns 16-octets identifing the device \p dev in the structure pointed by the \p uuid. - + - No resource view format (use underlying resource format) + Returns information about the execution affinity support of the device. + Returns in \p *pi whether execution affinity type \p type is supported by device \p dev. + The supported types are: + - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT: 1 if context with limited SMs is supported by the device, + or 0 if not; - + - 1 channel unsigned 8-bit integers + Free unused memory that was cached on the specified device for use with graphs back to the OS. + Blocks which are not in use by a graph that is either currently executing or scheduled to execute are freed back to the operating system. - + - 2 channel unsigned 8-bit integers + Set asynchronous allocation attributes related to graphs + Valid attributes are: + - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the last time it was reset.High watermark can only be reset to zero. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. - + - 4 channel unsigned 8-bit integers + Query asynchronous allocation attributes related to graphs + Valid attributes are: + - ::CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT: Amount of memory, in bytes, currently associated with graphs + - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the last time it was reset.High watermark can only be reset to zero. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT: Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. - + - 1 channel signed 8-bit integers + + + + - + - 2 channel signed 8-bit integers + + + + - + - 4 channel signed 8-bit integers + Returns true if both objects are of type CUdevice and if both Pointer member are equal. + + - + - 1 channel unsigned 16-bit integers + Overrides object.GetHashCode() + - + - 2 channel unsigned 16-bit integers + override ToString() + - + - 4 channel unsigned 16-bit integers + Pointer to CUDA device memory - + - 1 channel signed 16-bit integers + - + - 2 channel signed 16-bit integers + + + - + - 4 channel signed 16-bit integers + + + - + - 1 channel unsigned 32-bit integers + + + + - + - 2 channel unsigned 32-bit integers + + + + - + - 4 channel unsigned 32-bit integers + + + + - + - 1 channel signed 32-bit integers + + + + - + - 2 channel signed 32-bit integers + Returns true if both objects are of type CUdeviceptr and if both Pointer member is equal. + + - + - 4 channel signed 32-bit integers + Overrides object.GetHashCode() + - + - 1 channel 16-bit floating point + override ToString() + - + - 2 channel 16-bit floating point + + - + - 4 channel 16-bit floating point + The on which a pointer was allocated or registered - + - 1 channel 32-bit floating point + The describing the physical location of a pointer - + - 2 channel 32-bit floating point + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + - 4 channel 32-bit floating point + The address at which a pointer's memory may be accessed on the host - + - Block compressed 1 + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + - Block compressed 2 + Synchronize every synchronous memory operation initiated on this region - + - Block compressed 3 + A process-wide unique ID for an allocated memory region - + - Block compressed 4 unsigned + Indicates if the pointer points to managed memory - + - Block compressed 4 signed + Cuda event - + - Block compressed 5 unsigned + - + - Block compressed 5 signed + Cuda function / kernel - + - Block compressed 6 unsigned half-float + - + - Block compressed 6 signed half-float + Returns a module handle + Returns in \p *hmod the handle of the module that function \p hfunc + is located in. The lifetime of the module corresponds to the lifetime of + the context it was loaded in or until the module is explicitly unloaded. + The CUDA runtime manages its own modules loaded into the primary context. + If the handle returned by this API refers to a module loaded by the CUDA runtime, + calling ::cuModuleUnload() on that module will result in undefined behavior. - + - Block compressed 7 + Cuda module - + - Profiler Output Modes + - + - Output mode Key-Value pair format. + Cuda stream - + - Output mode Comma separated values format. + - + - CUDA Mem Attach Flags + Returns the CUDA NULL stream (0) - + - Memory can be accessed by any stream on any device + Stream handle that can be passed as a CUstream to use an implicit stream + with legacy synchronization behavior. - + - Memory cannot be accessed by any stream on any device + Stream handle that can be passed as a CUstream to use an implicit stream + with per-thread synchronization behavior. - + - Memory can only be accessed by a single stream on the associated device + CUDA texture reference - + - Occupancy calculator flag + - + - Default behavior + CUDA surface reference - + - Assume global caching is enabled and cannot be automatically turned off + - + - cudaDataType + CUDA graphics interop resource (DirectX / OpenGL) - + - 16 bit real + - + - 16 bit complex + CUDA texture object - + - 32 bit real + - + - 32 bit complex + CUDA surface object - + - 64 bit real + - + - 64 bit complex + CUDA definition of UUID - + - 8 bit real as a signed integer + - + - 8 bit complex as a pair of signed integers + 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms - + - 8 bit real as a signed integer + - + - 8 bit complex as a pair of signed integers + Interprocess Handle for Events - - - - - - - - - - - - - + - Operations for ::cuStreamBatchMemOp + - + - Represents a ::cuStreamWaitValue32 operation + Interprocess Handle for Memory - + - Represents a ::cuStreamWriteValue32 operation + - + - Represents a ::cuStreamWaitValue64 operation + half precission floating point - + - Represents a ::cuStreamWriteValue64 operation + - + - This has the same effect as ::CU_STREAM_WAIT_VALUE_FLUSH, but as a standalone operation. + - + - + - Whether the range will mostly be read and only occassionally be written to + - + - The preferred location of the range + two half precission floating point (x,y) - + - Memory range has ::CU_MEM_ADVISE_SET_ACCESSED_BY set for specified device + bfloat16 floating point - + - The last location to which the range was prefetched + - + - Shared memory carveout configurations + - + - no preference for shared memory or L1 (default) + - + - prefer maximum available shared memory, minimum L1 cache + - + - prefer maximum available L1 cache, minimum shared memory + two bfloat16 floating point (x,y) - + - Graph node types + CUDA external memory - + - GPU kernel node + - + - Memcpy node + CUDA external semaphore - + - Memset node + - + - Host (executable) node + CUDA graph - + - Node which executes an embedded graph + - + - Empty (no-op) node + CUDA graph node - + - + - Possible stream capture statuses returned by ::cuStreamIsCapturing + Returns the type of the Node - + - Stream is not capturing + Sets the parameters of host node nodeParams. + - + - Stream is actively capturing + Sets the parameters of kernel node nodeParams. + - + - Stream is part of a capture sequence that has been invalidated, but not terminated + Sets the parameters of memcpy node nodeParams. + - + - External memory handle types + Sets the parameters of memset node nodeParams. + - + - Handle is an opaque file descriptor + Sets an external semaphore signal node's parameters. + - + - Handle is an opaque shared NT handle + Sets an external semaphore wait node's parameters. + - + - Handle is an opaque, globally shared handle + Gets the parameters of host node. + - + - Handle is a D3D12 heap object + Gets the parameters of kernel node. + - + - Handle is a D3D12 committed resource + Gets the parameters of memcpy node. + - + - External semaphore handle types + Gets the parameters of memset node. + - + - Handle is an opaque file descriptor + Gets the external semaphore signal node's parameters. + - + - Handle is an opaque shared NT handle + Gets the external semaphore wait node's parameters. + - + - Handle is an opaque, globally shared handle + Returns a memory alloc node's parameters + - + - Handle is a shared NT handle referencing a D3D12 fence object + Returns a memory free node's parameters + - + - Flags to register a graphics resource + Only for ChildGraphNodes + - + - Specifies no hints about how this resource will be used. - It is therefore assumed that this resource will be read - from and written to by CUDA. This is the default value. + Returns a node's dependencies. + - + - Specifies that CUDA will not write to this resource. + Returns a node's dependent nodes - + - Specifies that CUDA will not read from this resource and - will write over the entire contents of the resource, so - none of the data previously stored in the resource will - be preserved. + Copies attributes from source node to destination node. + Copies attributes from source node \p src to destination node \p dst. Both node must have the same context. + Destination node - + - Specifies that CUDA will bind this resource to a surface reference. + Queries node attribute. + Queries attribute \p attr from node \p hNode and stores it in corresponding member of \p value_out. + - + - + Sets node attribute. + Sets attribute \p attr on node \p hNode from corresponding attribute of value. + + - + - Flags for mapping and unmapping graphics interop resources + Returns the event associated with an event record node - + - Specifies no hints about how this resource will be used. - It is therefore assumed that this resource will be read from and written to by CUDA. This is the default value. + Sets an event record node's event - + - Specifies that CUDA will not write to this resource. + Returns the event associated with an event wait node - + - Specifies that CUDA will not read from - this resource and will write over the entire contents of the resource, so none of the data previously stored in the - resource will be preserved. + Sets an event wait node's event - + - CUTexRefSetFlags + CUDA executable graph - + - + - Read the texture as integers rather than promoting the values to floats in the range [0,1]. - Flag for + - + - Use normalized texture coordinates in the range [0,1) instead of [0,dim). - Flag for + - + - Perform sRGB -> linear conversion during texture read. + CUDA memory pool - + - CUDA driver API initialization flags + - + - Currently no initialization flags are defined. + CUDA user object for graphs - + - CUDA driver API Context Enable Peer Access flags + - + - Currently no flags are defined. + Legacy device properties - + - CUDA stream flags + Maximum number of threads per block - + - For compatibilty with pre Cuda 5.0, equal to Default + Maximum size of each dimension of a block - + - Default stream flag + Maximum size of each dimension of a grid - + - Stream does not synchronize with stream 0 (the NULL stream) + Shared memory available per block in bytes - + - CudaCooperativeLaunchMultiDeviceFlags + Constant memory available on device in bytes - + - No flags + Warp size in threads. Also called SIMD width. - + - If set, each kernel launched as part of ::cuLaunchCooperativeKernelMultiDevice only - waits for prior work in the stream corresponding to that GPU to complete before the - kernel begins execution. + Maximum pitch in bytes allowed by the memory copy functions that involve memory regions allocated through + . - + - If set, any subsequent work pushed in a stream that participated in a call to - ::cuLaunchCooperativeKernelMultiDevice will only wait for the kernel launched on - the GPU corresponding to that stream to complete before it begins execution. + 32-bit registers available per block - + - CUDAArray3DFlags + Clock frequency in kilohertz - + - No flags + Alignment requirement for textures. texture base addresses that are aligned to textureAlign bytes do not + need an offset applied to texture fetches. - + - if set, the CUDA array contains an array of 2D slices and - the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the - number of slices, not the depth of a 3D array. + 2D memory copy parameters - + - if set, the CUDA array contains an array of layers where each layer is either a 1D - or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number - of layers, not the depth of a 3D array. + Source X in bytes - + - this flag must be set in order to bind a surface reference - to the CUDA array + Source Y - + - If set, the CUDA array is a collection of six 2D arrays, representing faces of a cube. The - width of such a CUDA array must be equal to its height, and Depth must be six. - If ::CUDA_ARRAY3D_LAYERED flag is also set, then the CUDA array is a collection of cubemaps - and Depth must be a multiple of six. + Source memory type (host, device, array) - + - This flag must be set in order to perform texture gather operations on a CUDA array. + Source host pointer - + - This flag if set indicates that the CUDA array is a DEPTH_TEXTURE. + Source device pointer - + - This flag indicates that the CUDA array may be bound as a color target in an external graphics API + Source array reference - + - CUMemHostAllocFlags. All of these flags are orthogonal to one another: a developer may allocate memory that is portable, mapped and/or - write-combined with no restrictions. + Source pitch (ignored when src is array) - + - No flags + Destination X in bytes - + - The memory returned by this call will be considered as pinned memory - by all CUDA contexts, not just the one that performed the allocation. + Destination Y - + - Maps the allocation into the CUDA address space. The device pointer - to the memory may be obtained by calling . This feature is available only on - GPUs with compute capability greater than or equal to 1.1. + Destination memory type (host, device, array) - + - Allocates the memory as write-combined (WC). WC memory - can be transferred across the PCI Express bus more quickly on some system configurations, but cannot be read - efficiently by most CPUs. WC memory is a good option for buffers that will be written by the CPU and read by - the GPU via mapped pinned memory or host->device transfers. - If set, host memory is allocated as write-combined - fast to write, - faster to DMA, slow to read except via SSE4 streaming load instruction - (MOVNTDQA). + Destination host pointer - + - Context creation flags. - The two LSBs of the flags parameter can be used to control how the OS thread, which owns the CUDA context at - the time of an API call, interacts with the OS scheduler when waiting for results from the GPU. + Destination device pointer - + - The default value if the flags parameter is zero, uses a heuristic based on the - number of active CUDA contexts in the process C and the number of logical processors in the system P. If C > - P, then CUDA will yield to other OS threads when waiting for the GPU, otherwise CUDA will not yield while - waiting for results and actively spin on the processor. + Destination array reference - + - Instruct CUDA to actively spin when waiting for results from the GPU. This can decrease - latency when waiting for the GPU, but may lower the performance of CPU threads if they are performing - work in parallel with the CUDA thread. + Destination pitch (ignored when dst is array) - + - Instruct CUDA to yield its thread when waiting for results from the GPU. This can - increase latency when waiting for the GPU, but can increase the performance of CPU threads performing work - in parallel with the GPU. + Width of 2D memory copy in bytes - + - Instruct CUDA to block the CPU thread on a synchronization primitive when waiting for the GPU to finish work. + Height of 2D memory copy - + - No description found... + 3D memory copy parameters - + - Instruct CUDA to support mapped pinned allocations. This flag must be set in order to allocate pinned host memory that is accessible to the GPU. + Source X in bytes - + - Instruct CUDA to not reduce local memory after resizing local memory - for a kernel. This can prevent thrashing by local memory allocations when launching many kernels with high - local memory usage at the cost of potentially increased memory usage. + Source Y - + - No description found... + Source Z - + - CUMemHostRegisterFlags. All of these flags are orthogonal to one another: a developer may allocate memory that is portable or mapped - with no restrictions. + Source LOD - + - No flags + Source memory type (host, device, array) - + - The memory returned by this call will be considered as pinned memory - by all CUDA contexts, not just the one that performed the allocation. + Source host pointer - + - Maps the allocation into the CUDA address space. The device pointer - to the memory may be obtained by calling . This feature is available only on - GPUs with compute capability greater than or equal to 1.1. + Source device pointer - + - If set, the passed memory pointer is treated as pointing to some - memory-mapped I/O space, e.g. belonging to a third-party PCIe device. - On Windows the flag is a no-op. - On Linux that memory is marked as non cache-coherent for the GPU and - is expected to be physically contiguous. - On all other platforms, it is not supported and CUDA_ERROR_INVALID_VALUE - is returned. + Source array reference - + - Flag for cuStreamAddCallback() + Must be NULL - + - No flags + Source pitch (ignored when src is array) - + - Event creation flags + Source height (ignored when src is array; may be 0 if Depth==1) - + - Default event creation flag. + Destination X in bytes - + - Specifies that event should use blocking synchronization. A CPU thread - that uses to wait on an event created with this flag will block until the event has actually - been recorded. + Destination Y - + - Event will not record timing data + Destination Z - + - Event is suitable for interprocess use. CUEventFlags.DisableTiming must be set + Destination LOD - + - Flags for ::cuStreamWaitValue32 + Destination memory type (host, device, array) - + - Wait until (int32_t)(*addr - value) >= 0 (or int64_t for 64 bit values). Note this is a cyclic comparison which ignores wraparound. (Default behavior.) + Destination host pointer - + - Wait until *addr == value. + Destination device pointer - + - Wait until (*addr & value) != 0. + Destination array reference - + - Wait until ~(*addr | value) != 0. Support for this operation can be - queried with ::cuDeviceGetAttribute() and ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR. - Generally, this requires compute capability 7.0 or greater. + Must be NULL - + - Follow the wait operation with a flush of outstanding remote writes. This - means that, if a remote write operation is guaranteed to have reached the - device before the wait can be satisfied, that write is guaranteed to be - visible to downstream device work. The device is permitted to reorder - remote writes internally. For example, this flag would be required if - two remote writes arrive in a defined order, the wait is satisfied by the - second write, and downstream work needs to observe the first write. + Destination pitch (ignored when dst is array) - + - Flags for ::cuStreamWriteValue32 + Destination height (ignored when dst is array; may be 0 if Depth==1) - + - Default behavior + Width of 3D memory copy in bytes - + - Permits the write to be reordered with writes which were issued - before it, as a performance optimization. Normally, ::cuStreamWriteValue32 will provide a memory fence before the - write, which has similar semantics to __threadfence_system() but is scoped to the stream rather than a CUDA thread. + Height of 3D memory copy - + - Indicates that the external memory object is a dedicated resource + Depth of 3D memory copy - + - No flags + 3D memory copy parameters - + - Indicates that the external memory object is a dedicated resource + Source X in bytes - + - CUDA stream callback + Source Y - The stream the callback was added to, as passed to ::cuStreamAddCallback. May be NULL. - CUDA_SUCCESS or any persistent error on the stream. - User parameter provided at registration. - + - Block size to per-block dynamic shared memory mapping for a certain - kernel. - e.g.: - If no dynamic shared memory is used: x => 0 - If 4 bytes shared memory per thread is used: x = 4 * x + Source Z - block size - The dynamic shared memory needed by a block - + - CUDA host function + Source LOD - Argument value passed to the function - + - An abstraction layer for the CUDA driver API + Source memory type (host, device, array) - + - Specifies the directX version to use with a cuda context, if necessary + Source host pointer - + - DirectX9 + Source device pointer - + - DirectX10 + Source array reference - + - DirectX11 + Source context (ignored with srcMemoryType is array) - + - Defines for GPU Architecture types (using the SM version to determine the # of cores per SM + Source pitch (ignored when src is array) - + - 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version + Source height (ignored when src is array; may be 0 if Depth==1) - - - - - - - - - - - - - - + + + Destination X in bytes + - - + + + Destination Y + - + - Create a new instace of managed Cuda. Creates a new cuda context. - Using device with ID 0 and + Destination Z - + - Create a new instace of managed Cuda. - If createNew is true, a new cuda context will be created. - If createNew is false, the CudaContext is bound to an existing cuda context. Creates a new context if no context exists. - Using device with ID 0 and + Destination LOD - - + - Create a new instace of managed Cuda. Creates a new cuda context. - Using + Destination memory type (host, device, array) - DeviceID - + - Create a new instace of managed Cuda. - If createNew is true, a new cuda context will be created. - If createNew is false, the CudaContext bounds to an existing cuda context. Creates a new context if no context exists. + Destination host pointer - DeviceID - - + - Create a new instace of managed Cuda. Creates a new cuda context. + Destination device pointer - DeviceID. - Context creation flags. - + - Create a new instace of a cuda context from the given CudaStream + Destination array reference - The stream to query - + - Create a new instace of managed Cuda + Destination context (ignored with dstMemoryType is array) - DeviceID. - Context creation flags. - Create a new CUDA context or use an exiting context for the calling thread. Creates a new context if no context exists. - + - Create a new instance of managed CUDA for a given Direct3DX-device. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + Destination pitch (ignored when dst is array) - Direct3D device - Context creation flags - DirectX Version to bind this context to (9, 10, 11) - + - Create a new instance of managed CUDA for a given Direct3DX-device. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - Use to obtain a list of possible values for cudaDevice. + Destination height (ignored when dst is array; may be 0 if Depth==1) - CUdevice to map this context to. Use to obtain a list of possible values - Direct3D device. - Context creation flags - DirectX (9, 10, 11) Version to bind this context to - + - As the normal context constructor has the same arguments, the OpenGL-constructor is private with inverse arguement order. - It has to be called from a static method. - Create a new instance of managed CUDA for a OpenGL-device. - OpenGL resources from this device may be registered and mapped through the lifetime of this CUDA context. + Width of 3D memory copy in bytes - CUdevice to map this context to. - Context creation flags - + - Create a new instace of managed Cuda, performing no CUDA API calls. Needed for inheritance. + Height of 3D memory copy - Additional constructor parameter to differentiate direct constructor call or inherited call, i.e. called by primaryContext class. - DeviceID. - + - For dispose + Depth of 3D memory copy - + - Dispose + Array descriptor - + - For IDisposable. - Note: If this instance created the wrapped CUcontext, it will be destroyed and can't be accessed by other threads anymore. - If this instance only was bound to an existing CUcontext, the wrapped CUcontext won't be destroyed. + Width of array - - + - Make sure the kernel image arrays are zero terminated by appending a zero + Height of array - + - Gets the context's API version + Array format - Version - + - Blocks until the device has completed all preceding requested tasks. Throws a if one of the - preceding tasks failed. If the context was created with the flag, the CPU thread will - block until the GPU context has finished its work. + Channels per array element - + - Push the CUDA context + 3D array descriptor - + - Pop the CUDA context + Width of 3D array - + - Binds this CUDA context to the calling CPU thread + Height of 3D array - + - Sets the shared memory configuration for the current context. - On devices with configurable shared memory banks, this function will set - the context's shared memory bank size which is used for subsequent kernel - launches. - Changed the shared memory configuration between launches may insert a device - side synchronization point between those launches. - Changing the shared memory bank size will not increase shared memory usage - or affect occupancy of kernels, but may have major effects on performance. - Larger bank sizes will allow for greater potential bandwidth to shared memory, - but will change what kinds of accesses to shared memory will result in bank - conflicts. - This function will do nothing on devices with fixed shared memory bank size. - - The supported bank configurations are: - - : set bank width to the default initial - setting (currently, four bytes). - - : set shared memory bank width to - be natively four bytes. - - : set shared memory bank width to - be natively eight bytes. + Depth of 3D array - + - Returns the current shared memory configuration for the current context. + Array format - + - Load a CUBIN-module from file + Channels per array element - - - + - Load a PTX module from file + Flags - - - - - + - Load a PTX module from file + Idea of a SizeT type from http://blogs.hoopoe-cloud.com/index.php/tag/cudanet/, entry from Tuesday, September 15th, 2009 - - Collection of linker and compiler options - - + - Load a PTX module from file + - - + - + - Load a ptx module from image as byte[] + - - Collection of linker and compiler options - + - + - Load a ptx module from image as byte[] + - - - - + - + - Load a ptx module from image as stream + - - - - + - + - Load a ptx module from image as stream + - - Collection of linker and compiler options - + - + - Load a ptx module from image as byte[] + - - + - + - Load a ptx module from image as stream + - + - + - Load a CUBIN-module from file and return directly a wrapped CudaKernel + - Path and name of the module file - The kernel name as defined in the *.cu file + - + - Load a PTX module from file and return directly a wrapped CudaKernel + - Path and name of the ptx-module file - The kernel name as defined in the *.cu file - JIT-compile options. Only if module image is a ptx module - JIT-compile options values. Only if module image is a ptx module + - + - Load a PTX module from file and return directly a wrapped CudaKernel + - Path and name of the ptx-module file - The kernel name as defined in the *.cu file - Collection of linker and compiler options. Only if module image is a ptx module + - + - Load a PTX module from file and return directly a wrapped CudaKernel + - Path and name of the ptx-module file - The kernel name as defined in the *.cu file + - + - Load a ptx module from image as byte[] and return directly a wrapped CudaKernel + - Module image (cubin or PTX) as byte[] - The kernel name as defined in the *.cu file - JIT-compile options. Only if module image is a ptx module - JIT-compilt options values. Only if module image is a ptx module + - + - Load a ptx module from image as byte[] and return directly a wrapped CudaKernel + - Module image (cubin or PTX) as byte[] - The kernel name as defined in the *.cu file - Collection of linker and compiler options. Only if module image is a ptx module + - + - Load a ptx module from image as stream and return directly a wrapped CudaKernel + - Module image (cubin or PTX) as stream - The kernel name as defined in the *.cu file - JIT-compile options. Only if module image is a ptx module - JIT-compilt options values. Only if module image is a ptx module + - + - Load a ptx module from image as stream and return directly a wrapped CudaKernel + - Module image (cubin or PTX) as stream - The kernel name as defined in the *.cu file - Collection of linker and compiler options. Only if module image is a ptx module + - + - Load a ptx module from image as byte[] and return directly a wrapped CudaKernel + - Module image (cubin or PTX) as byte[] - The kernel name as defined in the *.cu file + - + - Load a ptx module from image as stream and return directly a wrapped CudaKernel + - Module image (cubin or PTX) as stream - The kernel name as defined in the *.cu file + - + - Load a FatBinary module from image as byte[] + - + - + - Load a FatBinary module from image as stream + - + + - + - Load a FatBinary module from image as byte[] and return directly a wrapped CudaKernel + - Module image (fat binary) as byte[] - The kernel name as defined in the *.cu file + + - + - Load a FatBinary module from image as stream and return directly a wrapped CudaKernel + Define operator + on converted to ulong values to avoid fall back to int - Module image (fat binary) as stream - The kernel name as defined in the *.cu file + + - + - unload module + Define operator + on converted to ulong values to avoid fall back to int - + + + - + - unload kernel + Define operator + on converted to ulong values to avoid fall back to int - + + + - + - Allocate memory on the device + Define operator + on converted to ulong values to avoid fall back to int - + + - + - SetMemory (cuMemsetD8) + Define operator + on converted to ulong values to avoid fall back to int - - - + + + - + - SetMemory (cuMemsetD16) + Define operator - on converted to ulong values to avoid fall back to int - - - + + + - + - SetMemory (cuMemsetD32) + Define operator - on converted to ulong values to avoid fall back to int - - - + + + - + - SetMemory (cuMemset2DD8) + Define operator - on converted to ulong values to avoid fall back to int - - - - - + + + - + - SetMemory (cuMemset2DD16) + Define operator - on converted to ulong values to avoid fall back to int - - - - - + + + - + - SetMemory (cuMemset2DD32) + Define operator - on converted to ulong values to avoid fall back to int - - - - - + + + - + - SetMemory (cuMemsetD8) + Define operator * on converted to ulong values to avoid fall back to int - - - - + + + - + - SetMemory (cuMemsetD16) + Define operator * on converted to ulong values to avoid fall back to int - - - - + + + - + - SetMemory (cuMemsetD32) + Define operator * on converted to ulong values to avoid fall back to int - - - - + + + - + - SetMemory (cuMemset2DD8) + Define operator * on converted to ulong values to avoid fall back to int - - - - - - + + + - + - SetMemory (cuMemset2DD16) + Define operator * on converted to ulong values to avoid fall back to int - - - - - - + + + - + - SetMemory (cuMemset2DD32) + Define operator / on converted to ulong values to avoid fall back to int - - - - - - + + + - + - Free device memory + Define operator / on converted to ulong values to avoid fall back to int - + + + - + - Returns the total device memory in bytes + Define operator / on converted to ulong values to avoid fall back to int + + - + - Returns the free available device memory in bytes + Define operator / on converted to ulong values to avoid fall back to int + + - + - Queries if a device may directly access a peer device's memory + Define operator / on converted to ulong values to avoid fall back to int + + - + - On devices where the L1 cache and shared memory use the same hardware - resources, this returns the preferred cache configuration - for the current context. This is only a preference. The driver will use - the requested configuration if possible, but it is free to choose a different - configuration if required to execute functions. - This will return on devices - where the size of the L1 cache and shared memory are fixed. + Define operator > on converted to ulong values to avoid fall back to int + + - + - On devices where the L1 cache and shared memory use the same hardware - resources, this sets through cacheConfig the preferred cache configuration for - the current context. This is only a preference. The driver will use - the requested configuration if possible, but it is free to choose a different - configuration if required to execute the function. Any function preference - set via will be preferred over this context-wide - setting. Setting the context-wide cache configuration to - will cause subsequent kernel launches to prefer - to not change the cache configuration unless required to launch the kernel. - This setting does nothing on devices where the size of the L1 cache and - shared memory are fixed. - Launching a kernel with a different preference than the most recent - preference setting may insert a device-side synchronization point. + Define operator > on converted to ulong values to avoid fall back to int - + + + - + - Copy data from host to device memory + Define operator > on converted to ulong values to avoid fall back to int - Destination CUdeviceptr (Pointer to device memory) - Source array - Number of bytes to copy + + + - + - Copy data from host to device memory + Define operator > on converted to ulong values to avoid fall back to int - T must be of value type, i.e. a struct - Destination CUdeviceptr (Pointer to device memory) - Source pointer to host memory - - + + + + + - Copy data from host to device memory + Define operator > on converted to ulong values to avoid fall back to int - T must be of value type, i.e. a struct - Destination CUdeviceptr (Pointer to device memory) - Source pointer to host memory + + + - + - Copy data from host to device memory + Define operator < on converted to ulong values to avoid fall back to int - Destination CUdeviceptr (Pointer to device memory) - Source array + + + - + - Copy data from host to device memory + Define operator < on converted to ulong values to avoid fall back to int - Destination CUdeviceptr (Pointer to device memory) - Source array + + + - + - Copy data from host to device memory + Define operator < on converted to ulong values to avoid fall back to int - Destination CUdeviceptr (Pointer to device memory) - Source array + + + - + - Copy data from host to device memory + Define operator < on converted to ulong values to avoid fall back to int - Destination CUdeviceptr (Pointer to device memory) - Source array + + + - + - Copy data from host to device memory + Define operator < on converted to ulong values to avoid fall back to int - Destination CUdeviceptr (Pointer to device memory) - Source array + + + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + + - + - Copy data from host to device memory + returns this.value.ToString() - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + Returns this.value.GetHashCode() - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + Inner struct for CudaResourceDesc - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Device pointer - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Array format - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Channels per array element - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Size in bytes - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Inner struct for CudaResourceDesc - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Device pointer - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Array format - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Channels per array element - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Width of the array in elements - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Height of the array in elements - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Pitch between two rows in bytes - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Mimics the union "CUDA_RESOURCE_DESC.res" in cuda.h - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + CUDA array - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + CUDA mipmapped array - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Linear memory - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + Linear pitched 2D memory - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + CUDA Resource descriptor - Destination CUdeviceptr (Pointer to device memory) - Source array - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source array + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value + - + - Copy data from host to device memory + Resource type - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Mimics the union in C++ - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Flags (must be zero) - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Texture descriptor - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for all dimensions + Filter mode + Flags - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for all dimensions + Filter mode + Flags + borderColor (array of size 4) - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for dimension 0 + Address modes for dimension 1 + Address modes for dimension 2 + Filter mode + Flags - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for dimension 0 + Address modes for dimension 1 + Address modes for dimension 2 + Filter mode + Flags + borderColor (array of size 4) - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for all dimensions + Filter mode + Flags + Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic + filtering. This value will be clamped to the range [1,16]. + Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between + two defined mipmap levels. + Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. + Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. + Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for all dimensions + Filter mode + Flags + Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic + filtering. This value will be clamped to the range [1,16]. + Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between + two defined mipmap levels. + Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. + Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. + Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. + borderColor (array of size 4) - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for dimension 0 + Address modes for dimension 1 + Address modes for dimension 2 + Filter mode + Flags + Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic + filtering. This value will be clamped to the range [1,16]. + Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between + two defined mipmap levels. + Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. + Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. + Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. - + - Copy data from host to device memory + Creates a new CudaTextureDescriptor - Destination CUdeviceptr (Pointer to device memory) - Source value + Address modes for dimension 0 + Address modes for dimension 1 + Address modes for dimension 2 + Filter mode + Flags + Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic + filtering. This value will be clamped to the range [1,16]. + Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between + two defined mipmap levels. + Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. + Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. + Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. + borderColor (array of size 4) - + - Copy data from host to device memory + Address modes - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Filter mode - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Flags - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Maximum anisotropy ratio. Specifies the maximum anistropy ratio to be used when doing anisotropic + filtering. This value will be clamped to the range [1,16]. - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Mipmap filter mode. Specifies the filter mode when the calculated mipmap level lies between + two defined mipmap levels. - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Mipmap level bias. Specifies the offset to be applied to the calculated mipmap level. - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Mipmap minimum level clamp. Specifies the lower end of the mipmap level range to clamp access to. - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Mipmap maximum level clamp. Specifies the upper end of the mipmap level range to clamp access to. - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Border Color - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Resource view descriptor - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Resource view format - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Width of the resource view - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Height of the resource view - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Depth of the resource view - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + First defined mipmap level - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Last defined mipmap level - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + First layer index - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + Last layer index - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + GPU Direct v3 tokens - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from host to device memory + - Destination CUdeviceptr (Pointer to device memory) - Source value - + - Copy data from device to host memory + - T must be of value type, i.e. a struct - Destination data in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Per-operation parameters for ::cuStreamBatchMemOp - T must be of value type, i.e. a struct - Destination data in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + + + + + + + + + + + + + + + + + + + - Copy data from device to host memory + For driver internal use. Initial value is unimportant. - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + + + + + + + + + + + + + - Copy data from device to host memory + For driver internal use. Initial value is unimportant. - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + + + + + + + + + + + + + + + + + + + + + + - Copy data from device to host memory + Kernel launch parameters - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Kernel to launch - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Width of grid in blocks - Destination pointer to host memory - Source CUdeviceptr (Pointer to device memory) - Number of bytes to copy - + - Copy data from device to host memory + Height of grid in blocks - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Depth of grid in blocks - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + X dimension of each thread block - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Y dimension of each thread block - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Z dimension of each thread block - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Dynamic shared-memory size per thread block in bytes - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Stream identifier - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Array of pointers to kernel parameters - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + GPU kernel node parameters - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Kernel to launch - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Width of grid in blocks - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Height of grid in blocks - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Depth of grid in blocks - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + X dimension of each thread block - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Y dimension of each thread block - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Z dimension of each thread block - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Dynamic shared-memory size per thread block in bytes - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Array of pointers to kernel parameters - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Extra options - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Memset node parameters - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Destination device pointer - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Pitch of destination device pointer. Unused if height is 1 - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value to be set - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Size of each element in bytes. Must be 1, 2, or 4. - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Width of the row in elements - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Number of rows - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Initialieses the struct - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) + + + + - + - Copy data from device to host memory + Initialieses the struct - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) + + + + - + - Copy data from device to host memory + Host node parameters - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + The function to call when the node executes - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Argument to pass to the function - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Win32 handle referencing the semaphore object. Valid when + type is one of the following: + - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 + - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT + - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP + - ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE + Exactly one of 'handle' and 'name' must be non-NULL. If + type is + ::CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT + then 'name' must be NULL. - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Valid NT handle. Must be NULL if 'name' is non-NULL - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Name of a valid memory object. Must be NULL if 'handle' is non-NULL. - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + File descriptor referencing the memory object. Valid when type is CUDA_EXTERNAL_MEMORY_DEDICATED - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Win32 handle referencing the semaphore object. - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + A handle representing an NvSciBuf Object.Valid when type + is ::CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + External memory handle descriptor - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Type of the handle - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Size of the memory allocation - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Flags must either be zero or ::CUDA_EXTERNAL_MEMORY_DEDICATED - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + External semaphore handle descriptor - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Type of the handle - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Flags reserved for the future. Must be zero. - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + External memory buffer descriptor - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Offset into the memory object where the buffer's base is - Destination array in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Size of the buffer - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Flags reserved for future use. Must be zero. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + External memory mipmap descriptor - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Offset into the memory object where the base level of the mipmap chain is. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Format, dimension and type of base level of the mipmap chain - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Total number of levels in the mipmap chain - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + External semaphore signal parameters - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Parameters for fence objects - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value of fence to be signaled - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value of fence to be signaled - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + - Copy data from device to host memory + Pointer to NvSciSyncFence. Valid if CUexternalSemaphoreHandleType + is of type CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + - Copy data from device to host memory + Parameters for keyed mutex objects - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value of key to acquire the mutex with - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + + + + + + + - Copy data from device to host memory + Flags reserved for the future. Must be zero. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + External semaphore wait parameters - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Parameters for fence objects - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value of fence to be waited on - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value of fence to be waited on - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + - Copy data from device to host memory + Pointer to NvSciSyncFence. Valid if CUexternalSemaphoreHandleType + is of type CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + - Copy data from device to host memory + Parameters for keyed mutex objects - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value of key to acquire the mutex with - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Timeout in milliseconds to wait to acquire the mutex - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + + + + - Copy data from device to host memory + Flags reserved for the future. Must be zero. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Specifies a location for an allocation. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Specifies the location type, which modifies the meaning of id. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + identifier for a given this location's ::CUmemLocationType. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Allocation hint for requesting compressible memory. + On devices that support Compute Data Compression, compressible + memory can be used to accelerate accesses to data with unstructured + sparsity and other compressible data patterns.Applications are + expected to query allocation property of the handle obtained with + ::cuMemCreate using ::cuMemGetAllocationPropertiesFromHandle to + validate if the obtained allocation is compressible or not.Note that + compressed memory may not be mappable on all devices. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + + + + + + + - Copy data from device to host memory + Bitmask indicating intended usage for this allocation - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Specifies the allocation properties for a allocation. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Allocation type - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + requested ::CUmemAllocationHandleType - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Location of allocation - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Windows-specific LPSECURITYATTRIBUTES required when + ::CU_MEM_HANDLE_TYPE_WIN32 is specified.This security attribute defines + the scope of which exported allocations may be tranferred to other + processes.In all other cases, this field is required to be zero. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + allocFlags - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Memory access descriptor - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Location on which the request is to change it's accessibility - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + ::CUmemProt accessibility flags to set on the request - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Specifies an access policy for a window, a contiguous extent of memory + beginning at base_ptr and ending at base_ptr + num_bytes. + num_bytes is limited by CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE. + Partition into many segments and assign segments such that: + sum of "hit segments" / window == approx.ratio. + sum of "miss segments" / window == approx 1-ratio. + Segments and ratio specifications are fitted to the capabilities of + the architecture. + Accesses in a hit segment apply the hitProp access policy. + Accesses in a miss segment apply the missProp access policy. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Starting address of the access policy window. CUDA driver may align it. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Size in bytes of the window policy. CUDA driver may restrict the maximum size and alignment. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + hitRatio specifies percentage of lines assigned hitProp, rest are assigned missProp. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + ::CUaccessProperty set for hit. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + ::CUaccessProperty set for miss. Must be either NORMAL or STREAMING - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Graph attributes union, used with ::cuKernelNodeSetAttribute/::cuKernelNodeGetAttribute - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Attribute ::CUaccessPolicyWindow. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Nonzero indicates a cooperative kernel (see ::cuLaunchCooperativeKernel). - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Stream attributes union, used with ::cuStreamSetAttribute/::cuStreamGetAttribute - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Attribute ::CUaccessPolicyWindow. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + Value for ::CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY. - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + CUDA array sparse properties - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Copy data from device to host memory + TileExtent - Destination value in host memory - Source CUdeviceptr (Pointer to device memory) - + - Returns the device name of the device bound to the actual context + Width of sparse tile in elements - Device Name - + - Returns the device's compute capability of the device bound to the actual context + Height of sparse tile in elements - Device compute capability - + - Retrieve device properties + Depth of sparse tile in elements - DeviceProperties - + - Returns numerical values that correspond to the least and greatest stream priorities. - Returns in leastPriority and greatestPriority the numerical values that correspond - to the least and greatest stream priorities respectively. Stream priorities - follow a convention where lower numbers imply greater priorities. The range of - meaningful stream priorities is given by [greatestPriority, leastPriority]. - If the user attempts to create a stream with a priority value that is - outside the meaningful range as specified by this API, the priority is - automatically clamped down or up to either leastPriority or greatestPriority - respectively. See ::cuStreamCreateWithPriority for details on creating a - priority stream. - A NULL may be passed in for leastPriority or greatestPriority if the value - is not desired. - This function will return '0' in both leastPriority and greatestPriority if - the current context's device does not support stream priorities - (see ::cuDeviceGetAttribute). + TileExtent - Pointer to an int in which the numerical value for least - stream priority is returned - Pointer to an int in which the numerical value for greatest stream priority is returned - + - Returns the current size of limit. See + First mip level at which the mip tail begins. - Limit to query - Returned size in bytes of limit - + - Setting limit to value is a request by the application to update the current limit maintained by the context. The - driver is free to modify the requested value to meet h/w requirements (this could be clamping to minimum or maximum - values, rounding up to nearest element size, etc). The application can use to find out exactly what - the limit has been set to. - Setting each has its own specific restrictions, so each is discussed here: - - ValueRestriction - - controls the stack size of each GPU thread. This limit is only applicable to devices - of compute capability 2.0 and higher. Attempting to set this limit on devices of compute capability less than 2.0 - will result in the error being returned. - - - controls the size of the FIFO used by the printf() device system call. Setting - must be performed before loading any module that uses the printf() device - system call, otherwise will be returned. This limit is only applicable to - devices of compute capability 2.0 and higher. Attempting to set this limit on devices of compute capability less - than 2.0 will result in the error being returned. - - - controls the size in bytes of the heap used by the ::malloc() and ::free() device system calls. Setting - must be performed before launching any kernel that uses the ::malloc() or ::free() device system calls, otherwise - will be returned. This limit is only applicable to - devices of compute capability 2.0 and higher. Attempting to set this limit on devices of compute capability less - than 2.0 will result in the error being returned. - - - controls the maximum nesting depth of a grid at which a thread can safely call ::cudaDeviceSynchronize(). Setting - this limit must be performed before any launch of a kernel that uses the - device runtime and calls ::cudaDeviceSynchronize() above the default sync - depth, two levels of grids. Calls to ::cudaDeviceSynchronize() will fail - with error code ::cudaErrorSyncDepthExceeded if the limitation is - violated. This limit can be set smaller than the default or up the maximum - launch depth of 24. When setting this limit, keep in mind that additional - levels of sync depth require the driver to reserve large amounts of device - memory which can no longer be used for user allocations. If these - reservations of device memory fail, ::cuCtxSetLimit will return - , and the limit can be reset to a lower value. - This limit is only applicable to devices of compute capability 3.5 and - higher. Attempting to set this limit on devices of compute capability less - than 3.5 will result in the error being - returned. - - - controls the maximum number of - outstanding device runtime launches that can be made from the current - context. A grid is outstanding from the point of launch up until the grid - is known to have been completed. Device runtime launches which violate - this limitation fail and return ::cudaErrorLaunchPendingCountExceeded when - ::cudaGetLastError() is called after launch. If more pending launches than - the default (2048 launches) are needed for a module using the device - runtime, this limit can be increased. Keep in mind that being able to - sustain additional pending launches will require the driver to reserve - larger amounts of device memory upfront which can no longer be used for - allocations. If these reservations fail, ::cuCtxSetLimit will return - , and the limit can be reset to a lower value. - This limit is only applicable to devices of compute capability 3.5 and - higher. Attempting to set this limit on devices of compute capability less - than 3.5 will result in the error being - returned. - - + Total size of the mip tail. - Limit to set - Size in bytes of limit - + - As the normal context constructor has the same arguments, the OpenGL-constructor is private with inverse arguement order. - It has to be called from a static method. - Create a new instance of managed CUDA for a OpenGL-device. - OpenGL resources from this device may be registered and mapped through the lifetime of this CUDA context. + Flags will either be zero or ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL - CUdevice to map this context to. - Context creation flags - + - Gets the CUDA devices associated with the current OpenGL context + Specifies the CUDA array or CUDA mipmapped array memory mapping information - SLI parameter - - + - Returns a list of possible CUDA devices to use for a given DirectX device + - DirectX device - SLI parameter - DirectX version of the directX device - - + - Returns the Direct3D device against which the CUDA context, bound to the calling thread, - was created. + resource - - - + - Returns the device name of the device with ID deviceID + resource - - Device Name - + - GPU Architecture definitions + - + - returns the best GPU (with maximum GFLOPS) + - best GPU - + - returns the best GPU (with maximum GFLOPS). + For CUDA mipmapped arrays must a valid mipmap level. For CUDA arrays must be zero - Id of the best GPU - + - Returns the device's compute capability of the device with ID deviceID + For CUDA layered arrays must be a valid layer index. Otherwise, must be zero - - Device compute capability - + - Returns the version number of the installed cuda driver + Starting X offset in elements - CUDA driver version - + - Retrieve device properties + Starting Y offset in elements - Device ID - DeviceProperties - + - Get the number of CUDA capable devices + Starting Z offset in elements - - + - If both the current context (current to the calling thread) and peerContext are on devices which support unified - addressing (as may be queried using GetDeviceInfo), then - on success all allocations from peerContext will immediately be accessible - by the current context. See \ref CUDA_UNIFIED for additional - details. - Note that access granted by this call is unidirectional and that in order to access - memory from the current context in peerContext, a separate symmetric call - to ::cuCtxEnablePeerAccess() is required. - Returns if indicates - that the CUdevice of the current context cannot directly access memory - from the CUdevice of peerContext. - Throws if direct access of - peerContext from the current context has already been enabled. - Throws if there is no current context, peerContext - is not a valid context, or if the current context is peerContext. + Width in elements - Peer context to enable direct access to from the current context - - + - Disables direct access to memory allocations in a peer context and unregisters any registered allocations. + Height in elements - Peer context to disable direct access to - - + - Fills the CudaDeviceProperties structure + Depth in elements - + - Gets the CUdevice for a given device ordinal number + - - - + - Initialize the profiling. - Using this API user can initialize the CUDA profiler by specifying - the configuration file, output file and output file format. This - API is generally used to profile different set of counters by - looping the kernel launch. The configFile parameter can be used - to select profiling options including profiler counters. Refer to - the "Compute Command Line Profiler User Guide" for supported - profiler options and counters. - Limitation: The CUDA profiler cannot be initialized with this API - if another profiling tool is already active, as indicated by the - exception . + For CUDA layered arrays must be a valid layer index. Otherwise, must be zero - Name of the config file that lists the counters/options for profiling. - Name of the outputFile where the profiling results will be stored. - outputMode - + - Enable profiling. - Enables profile collection by the active profiling tool for the - current context. If profiling is already enabled, then - cuProfilerStart() has no effect. - cuProfilerStart and cuProfilerStop APIs are used to - programmatically control the profiling granularity by allowing - profiling to be done only on selective pieces of code. + Offset within mip tail - + - Disables profile collection by the active profiling tool for the - current context. If profiling is already disabled, then - cuProfilerStop() has no effect. - cuProfilerStart and cuProfilerStop APIs are used to - programmatically control the profiling granularity by allowing - profiling to be done only on selective pieces of code. + Extent in bytes - + - Gets the Cuda context bound to this managed Cuda object + - + - Gets the Cuda device allocated to the Cuda Context + - + - Gets the Id of the Cuda device. + Resource type - + - Indicates if the CudaContext instance created the wrapped cuda context (return = true) or if the CudaContext instance was bound to an existing cuda context. + - + - Gets the Id of the Cuda device. + Sparse subresource type - + - Number of channels in array + - + - One channel, e.g. float1, int1, float, int + Memory operation type - + - Two channels, e.g. float2, int2 + Memory handle type - + - Four channels, e.g. float4, int4 + - + - An one dimensional CUDA array + Offset within the memory - + - Creates a new CUDA array. + Device ordinal bit mask - - - - + - Creates a new CUDA array from an existing CUarray. - The CUarray won't be destroyed when disposing. - Array properties are obtained by cuArrayGetDescriptor + flags for future use, must be zero now. - - + - Creates a new CUDA array from an existing CUarray. - Array properties are obtained by cuArrayGetDescriptor + Reserved for future use, must be zero now. - - The cuArray will be destroyed while disposing, if the CudaArray is the owner - + - For dispose + Reserved for future use, must be zero now. - + - Dispose + Semaphore signal node parameters - - - For IDisposable - - + + - + + + + - Copy data from host to array memory + Semaphore wait node parameters - T must be of value type, i.e. a struct - source pointer to host memory - Offset in bytes of destination array - + + + + + + + - Copy data from host to array memory + - T must be of value type, i.e. a struct - source pointer to host memory - Offset in bytes of destination array - + + + + + + + + + + + + + + + + - Copy data from host to array memory + - Pointer to source data - Number of bytes to copy - Offset in bytes of destination array - + - Copy data from host to array memory + Value for ::CU_EXEC_AFFINITY_TYPE_SM_COUNT - Offset in bytes of destination array - source array - + - Copy data from host to array memory + The number of SMs the context is limited to use. - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Value for ::CU_EXEC_AFFINITY_TYPE_SM_COUNT - Offset in bytes of destination array - source array - + - Copy data from host to array memory + The number of SMs the context is limited to use. - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Execution Affinity Parameters - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Memory allocation node parameters - Offset in bytes of destination array - source array - + - Copy data from host to array memory + in: location where the allocation should reside (specified in ::location). + ::handleTypes must be::CU_MEM_HANDLE_TYPE_NONE.IPC is not supported. - Offset in bytes of destination array - source array - + - Copy data from host to array memory + in: array of memory access descriptors. Used to describe peer GPU access - Offset in bytes of destination array - source array - + - Copy data from host to array memory + in: number of memory access descriptors. Must not exceed the number of GPUs. - Offset in bytes of destination array - source array - + - Copy data from host to array memory + in: size in bytes of the requested allocation - Offset in bytes of destination array - source array - + - Copy data from host to array memory + out: address of the allocation returned by CUDA - Offset in bytes of destination array - source array - + - Copy data from host to array memory - - Offset in bytes of destination array - source array + Translates from CudaDataType to .net type and vice versa + - + - Copy data from host to array memory + - Offset in bytes of destination array - source array + + - + - Copy data from host to array memory + - Offset in bytes of destination array - source array + + - + - Copy data from host to array memory + - Offset in bytes of destination array - source array + + - + - Copy data from host to array memory + - Offset in bytes of destination array - source array + + - + - Copy data from host to array memory + An abstraction layer for the CUDA driver API - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Specifies the directX version to use with a cuda context, if necessary - Offset in bytes of destination array - source array - + - Copy data from host to array memory + DirectX9 - Offset in bytes of destination array - source array - + - Copy data from host to array memory + DirectX10 - Offset in bytes of destination array - source array - + - Copy data from host to array memory + DirectX11 - Offset in bytes of destination array - source array - + + + + + + + + + + + + + - Copy data from host to array memory + Create a new instace of managed Cuda. Creates a new cuda context. + Using device with ID 0 and - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Create a new instace of managed Cuda. + If createNew is true, a new cuda context will be created. + If createNew is false, the CudaContext is bound to an existing cuda context. Creates a new context if no context exists. + Using device with ID 0 and - Offset in bytes of destination array - source array + - + - Copy data from host to array memory + Create a new instace of managed Cuda. Creates a new cuda context. + Using - Offset in bytes of destination array - source array + DeviceID - + - Copy data from host to array memory + Create a new instace of managed Cuda. + If createNew is true, a new cuda context will be created. + If createNew is false, the CudaContext bounds to an existing cuda context. Creates a new context if no context exists. - Offset in bytes of destination array - source array + DeviceID + - + - Copy data from host to array memory + Create a new instace of managed Cuda. Creates a new cuda context. - Offset in bytes of destination array - source array + DeviceID. + Context creation flags. - + - Copy data from host to array memory + Create a new instace of a cuda context from the given CudaStream - Offset in bytes of destination array - source array + The stream to query - + - Copy data from host to array memory + Create a new instace of managed Cuda - Offset in bytes of destination array - source array + DeviceID. + Context creation flags. + Create a new CUDA context or use an exiting context for the calling thread. Creates a new context if no context exists. - + - Copy data from host to array memory + Create a new instace of managed Cuda with execution affinity - Offset in bytes of destination array - source array + DeviceID. + Context creation flags. + - + - Copy data from host to array memory + Create a new instance of managed CUDA for a given Direct3DX-device. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - Offset in bytes of destination array - source array + Direct3D device + Context creation flags + DirectX Version to bind this context to (9, 10, 11) - + - Copy data from host to array memory + Create a new instance of managed CUDA for a given Direct3DX-device. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + Use to obtain a list of possible values for cudaDevice. - Offset in bytes of destination array - source array + CUdevice to map this context to. Use to obtain a list of possible values + Direct3D device. + Context creation flags + DirectX (9, 10, 11) Version to bind this context to - + - Copy data from host to array memory + As the normal context constructor has the same arguments, the OpenGL-constructor is private with inverse arguement order. + It has to be called from a static method. + Create a new instance of managed CUDA for a OpenGL-device. + OpenGL resources from this device may be registered and mapped through the lifetime of this CUDA context. - Offset in bytes of destination array - source array + CUdevice to map this context to. + Context creation flags - + - Copy data from host to array memory + Create a new instace of managed Cuda, performing no CUDA API calls. Needed for inheritance. - Offset in bytes of destination array - source array + Additional constructor parameter to differentiate direct constructor call or inherited call, i.e. called by primaryContext class. + DeviceID. - + - Copy data from host to array memory + For dispose - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Dispose - Offset in bytes of destination array - source array - + - Copy data from host to array memory + For IDisposable. + Note: If this instance created the wrapped CUcontext, it will be destroyed and can't be accessed by other threads anymore. + If this instance only was bound to an existing CUcontext, the wrapped CUcontext won't be destroyed. - Offset in bytes of destination array - source array + - + - Copy data from host to array memory + Make sure the kernel image arrays are zero terminated by appending a zero - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Gets the context's API version - Offset in bytes of destination array - source array + Version - + - Copy data from host to array memory + Blocks until the device has completed all preceding requested tasks. Throws a if one of the + preceding tasks failed. If the context was created with the flag, the CPU thread will + block until the GPU context has finished its work. - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Push the CUDA context - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Pop the CUDA context - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Binds this CUDA context to the calling CPU thread - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Sets the shared memory configuration for the current context. + On devices with configurable shared memory banks, this function will set + the context's shared memory bank size which is used for subsequent kernel + launches. + Changed the shared memory configuration between launches may insert a device + side synchronization point between those launches. + Changing the shared memory bank size will not increase shared memory usage + or affect occupancy of kernels, but may have major effects on performance. + Larger bank sizes will allow for greater potential bandwidth to shared memory, + but will change what kinds of accesses to shared memory will result in bank + conflicts. + This function will do nothing on devices with fixed shared memory bank size. + + The supported bank configurations are: + - : set bank width to the default initial + setting (currently, four bytes). + - : set shared memory bank width to + be natively four bytes. + - : set shared memory bank width to + be natively eight bytes. - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Returns the current shared memory configuration for the current context. - Offset in bytes of destination array - source array - + - Copy data from host to array memory + Load a CUBIN-module from file - Offset in bytes of destination array - source array + + - + - Copy data from host to array memory + Load a PTX module from file - Offset in bytes of destination array - source array + + + + - + - Copy data from host to array memory + Load a PTX module from file - Offset in bytes of destination array - source array + + Collection of linker and compiler options + - + - Copy data from host to array memory + Load a PTX module from file - Offset in bytes of destination array - source array + + - + - Copy data from host to array memory + Load a ptx module from image as byte[] - Offset in bytes of destination array - source array + + Collection of linker and compiler options + - + - Copy data from host to array memory + Load a ptx module from image as byte[] - Offset in bytes of destination array - source array + + + + - + - Copy data from host to array memory + Load a ptx module from image as stream - Offset in bytes of destination array - source array + + + + - + - Copy data from host to array memory + Load a ptx module from image as stream - Offset in bytes of destination array - source array + + Collection of linker and compiler options + - + - Copy data from host to array memory + Load a ptx module from image as byte[] - Offset in bytes of destination array - source array + + - + - Copy data from array to host memory + Load a ptx module from image as stream - T must be of value type, i.e. a struct - Destination pointer to host memory - Offset in bytes of destination array + + - + - Copy data from array to host memory + Load a CUBIN-module from file and return directly a wrapped CudaKernel - T must be of value type, i.e. a struct - Destination pointer to host memory - Offset in bytes of destination array + Path and name of the module file + The kernel name as defined in the *.cu file + - + - Copy data from array to host memory + Load a PTX module from file and return directly a wrapped CudaKernel - Pointer to Destination data - Number of bytes to copy - Offset in bytes of destination array + Path and name of the ptx-module file + The kernel name as defined in the *.cu file + JIT-compile options. Only if module image is a ptx module + JIT-compile options values. Only if module image is a ptx module + - + - Copy data from array to host memory + Load a PTX module from file and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Path and name of the ptx-module file + The kernel name as defined in the *.cu file + Collection of linker and compiler options. Only if module image is a ptx module + - + - Copy data from array to host memory + Load a PTX module from file and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Path and name of the ptx-module file + The kernel name as defined in the *.cu file + - + - Copy data from array to host memory + Load a ptx module from image as byte[] and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (cubin or PTX) as byte[] + The kernel name as defined in the *.cu file + JIT-compile options. Only if module image is a ptx module + JIT-compilt options values. Only if module image is a ptx module + - + - Copy data from array to host memory + Load a ptx module from image as byte[] and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (cubin or PTX) as byte[] + The kernel name as defined in the *.cu file + Collection of linker and compiler options. Only if module image is a ptx module + - + - Copy data from array to host memory + Load a ptx module from image as stream and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (cubin or PTX) as stream + The kernel name as defined in the *.cu file + JIT-compile options. Only if module image is a ptx module + JIT-compilt options values. Only if module image is a ptx module + - + - Copy data from array to host memory + Load a ptx module from image as stream and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (cubin or PTX) as stream + The kernel name as defined in the *.cu file + Collection of linker and compiler options. Only if module image is a ptx module + - + - Copy data from array to host memory + Load a ptx module from image as byte[] and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (cubin or PTX) as byte[] + The kernel name as defined in the *.cu file + - + - Copy data from array to host memory + Load a ptx module from image as stream and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (cubin or PTX) as stream + The kernel name as defined in the *.cu file + - + - Copy data from array to host memory + Load a FatBinary module from image as byte[] - Offset in bytes of destination array - Destination array + + - + - Copy data from array to host memory + Load a FatBinary module from image as stream - Offset in bytes of destination array - Destination array + + - + - Copy data from array to host memory + Load a FatBinary module from image as byte[] and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (fat binary) as byte[] + The kernel name as defined in the *.cu file + - + - Copy data from array to host memory + Load a FatBinary module from image as stream and return directly a wrapped CudaKernel - Offset in bytes of destination array - Destination array + Module image (fat binary) as stream + The kernel name as defined in the *.cu file + - + - Copy data from array to host memory + unload module - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + unload kernel - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + Allocate memory on the device - Offset in bytes of destination array - Destination array + + - + - Copy data from array to host memory + SetMemory (cuMemsetD8) - Offset in bytes of destination array - Destination array + + + - + - Copy data from array to host memory + SetMemory (cuMemsetD16) - Offset in bytes of destination array - Destination array + + + - + - Copy data from array to host memory + SetMemory (cuMemsetD32) - Offset in bytes of destination array - Destination array + + + - + - Copy data from array to host memory + SetMemory (cuMemset2DD8) - Offset in bytes of destination array - Destination array + + + + + - + - Copy data from array to host memory + SetMemory (cuMemset2DD16) - Offset in bytes of destination array - Destination array + + + + + - + - Copy data from array to host memory + SetMemory (cuMemset2DD32) - Offset in bytes of destination array - Destination array + + + + + - + - Copy data from array to host memory + SetMemory (cuMemsetD8) - Offset in bytes of destination array - Destination array + + + + - + - Copy data from array to host memory + SetMemory (cuMemsetD16) - Offset in bytes of destination array - Destination array + + + + - + - Copy data from array to host memory + SetMemory (cuMemsetD32) - Offset in bytes of destination array - Destination array + + + + - + - Copy data from array to host memory + SetMemory (cuMemset2DD8) - Offset in bytes of destination array - Destination array + + + + + + - + - Copy data from array to host memory + SetMemory (cuMemset2DD16) - Offset in bytes of destination array - Destination array + + + + + + - + - Copy data from array to host memory + SetMemory (cuMemset2DD32) - Offset in bytes of destination array - Destination array + + + + + + - + - Copy data from array to host memory + Free device memory - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + Returns the total device memory in bytes - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + Returns the free available device memory in bytes - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + Queries if a device may directly access a peer device's memory - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + On devices where the L1 cache and shared memory use the same hardware + resources, this returns the preferred cache configuration + for the current context. This is only a preference. The driver will use + the requested configuration if possible, but it is free to choose a different + configuration if required to execute functions. + This will return on devices + where the size of the L1 cache and shared memory are fixed. - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + On devices where the L1 cache and shared memory use the same hardware + resources, this sets through cacheConfig the preferred cache configuration for + the current context. This is only a preference. The driver will use + the requested configuration if possible, but it is free to choose a different + configuration if required to execute the function. Any function preference + set via will be preferred over this context-wide + setting. Setting the context-wide cache configuration to + will cause subsequent kernel launches to prefer + to not change the cache configuration unless required to launch the kernel. + This setting does nothing on devices where the size of the L1 cache and + shared memory are fixed. + Launching a kernel with a different preference than the most recent + preference setting may insert a device-side synchronization point. - Offset in bytes of destination array - Destination array + - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array + Number of bytes to copy - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + T must be of value type, i.e. a struct + Destination CUdeviceptr (Pointer to device memory) + Source pointer to host memory - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + T must be of value type, i.e. a struct + Destination CUdeviceptr (Pointer to device memory) + Source pointer to host memory - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to host memory + Copy data from host to device memory - Offset in bytes of destination array - Destination array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to array + Copy data from host to device memory - Destination array - source array - Size of memory copy in bytes - Offset in bytes of destination array - Offset in bytes of source array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to array + Copy data from host to device memory - Destination array - Size of memory copy in bytes - Offset in bytes of destination array - Offset in bytes of source array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to array + Copy data from host to device memory - Destination array - Size of memory copy in bytes - Offset in bytes of destination array - Offset in bytes of source array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from array to device + Copy data from host to device memory - DevicePointer to copy data to - number of bytes to copy - Offset in bytes of source array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from device to array + Copy data from host to device memory - DevicePointer to copy data from - number of bytes to copy - Offset in bytes of source array + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the array width in elements + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the array width in bytes + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the wrapped CUarray + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the wrapped CUDAArrayDescriptor + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Number of channels in array + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - One channel, e.g. float1, int1, float, int + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Two channels, e.g. float2, int2 + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Four channels, e.g. float4, int4 + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - A two dimensional CUDA array + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Creates a new CUDA array. + Copy data from host to device memory - - In elements - In elements - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Creates a new CUDA array from an existing CUarray. - The CUarray won't be destroyed when disposing. - Array properties are obtained by cuArrayGetDescriptor + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Creates a new CUDA array from an existing CUarray. - Array properties are obtained by cuArrayGetDescriptor + Copy data from host to device memory - - The cuArray will be destroyed while disposing if the CudaArray is the owner + Destination CUdeviceptr (Pointer to device memory) + Source array - + - For dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - For IDisposable + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - A raw data copy method + Copy data from host to device memory - 2D copy paramters + Destination CUdeviceptr (Pointer to device memory) + Source array - + - A raw unaligned copy method + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy from Host to this array + Copy data from host to device memory - Source - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from this array to host + Copy data from host to device memory - IntPtr to destination in host memory - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy from Host to this array + Copy data from host to device memory - Host array base type - Source + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy data from this array to host + Copy data from host to device memory - Host array base type - Destination + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy from a pitched device variable to this array + Copy data from host to device memory - device variable base type - Source + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy from this array to a pitched device variable + Copy data from host to device memory - device variable base type - Destination + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy array to array + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Copy array to array + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the wrapped CUarray + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the wrapped CUDAArrayDescriptor + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the Height of the array + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the array width in elements + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - Returns the array width in bytes + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source array - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Number of channels in array + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - One channel, e.g. float1, int1, float, int + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Two channels, e.g. float2, int2 + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Four channels, e.g. float4, int4 + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - A three dimensional CUDA array + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a new CUDA array. + Copy data from host to device memory - - In elements - In elements - In elements - - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a new CUDA array from an existing CUarray. - The CUarray won't be destroyed when disposing. - Array properties are obtained by cuArrayGetDescriptor + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a new CUDA array from an existing CUarray. - Array properties are obtained by cuArrayGetDescriptor + Copy data from host to device memory - - The cuArray will be destroyed while disposing, if the CudaArray is the owner + Destination CUdeviceptr (Pointer to device memory) + Source value - + - For dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - For IDisposable + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - A raw data copy method + Copy data from host to device memory - 3D copy paramters + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy from Host to this array + Copy data from host to device memory - Source - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy data from this array to host + Copy data from host to device memory - IntPtr to destination in host memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy from Host to this array + Copy data from host to device memory - Host array base type - Source + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy data from this array to host + Copy data from host to device memory - Host array base type - Destination + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy from a pitched device variable to this array + Copy data from host to device memory - Source - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy from a pitched device variable to this array + Copy data from host to device memory - Source - - Pitch in bytes + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy from this array to a pitched device variable + Copy data from host to device memory - Destination - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy from this array to a pitched device variable + Copy data from host to device memory - Destination - - Pitch in bytes + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy array to array + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Copy array to array + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Returns the wrapped CUarray + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Returns the wrapped CUDAArray3DDescriptor + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Returns the Depth of the array + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Returns the Height of the array + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Returns the array width in elements + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Returns the array width in bytes + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Wrapps a CUevent handle. + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a new Event using + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a new Event + Copy data from host to device memory - Parameters for event creation + Destination CUdeviceptr (Pointer to device memory) + Source value - + - For dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - For IDisposable + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - returns the wrapped CUevent handle + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Records an event. If stream is non-zero, the event is recorded after all preceding operations in the stream have been - completed; otherwise, it is recorded after all preceding operations in the CUDA context have been completed. Since - operation is asynchronous, and/or must be used to determine when the event - has actually been recorded. - If has previously been called and the event has not been recorded yet, this function throws - . + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Records an event. If stream is non-zero, the event is recorded after all preceding operations in the stream have been - completed; otherwise, it is recorded after all preceding operations in the CUDA context have been completed. Since - operation is asynchronous, and/or must be used to determine when the event - has actually been recorded. - If has previously been called and the event has not been recorded yet, this function throws - . - + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Waits until the event has actually been recorded. If has been called on this event, the function returns - . Waiting for an event that was created with the - flag will cause the calling CPU thread to block until the event has actually been recorded. - If has previously been called and the event has not been recorded yet, this function throws . + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Returns true if the event has actually been recorded, or false if not. If - has not been called on this event, the function throws . + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Computes the elapsed time between two events (in milliseconds with a resolution of around 0.5 microseconds). If - either event has not been recorded yet, this function throws . If either event has been - recorded with a non-zero stream, the result is undefined. + Copy data from host to device memory - - - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Represents a Cuda graph. On disose() all graph nodes will be distroyed, too! + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a new CudaGraph + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - For clone graph method + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - For dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Dispose + Copy data from host to device memory + Destination CUdeviceptr (Pointer to device memory) + Source value - + - For IDisposable + Copy data from host to device memory - + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates an empty node and adds it to a graph - Creates a new node which performs no operation, and adds it to to the graph with - dependencies specified via dependencies. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - - An empty node performs no operation during execution, but can be used for - transitive ordering. For example, a phased execution graph with 2 groups of n - nodes with a barrier between them can be represented using an empty node and - 2*n dependency edges, rather than no empty node and n^2 dependency edges. + Copy data from host to device memory - can be null - A handle to the new node will be returned. + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a memset node and adds it to a graph - Creates a new memset node and adds it to graph with - dependencies specified via dependencies. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - The element size must be 1, 2, or 4 bytes. - When the graph is launched, the node will perform the memset described by memsetParams. + Copy data from host to device memory - can be null - When the graph is launched, the node will perform the memset described by memsetParams. - Cuda context used for the operation - A handle to the new node will be returned. + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a memset node and adds it to a graph - Creates a new memset node and adds it to graph with - dependencies specified via dependencies. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - The element size must be 1, 2, or 4 bytes. - When the graph is launched, the node will perform the memset described by memsetParams. + Copy data from host to device memory - can be null - When the graph is launched, the node will perform the memset on deviceVariable. - Value to set - Cuda context used for the operation - A handle to the new node will be returned. + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a memset node and adds it to a graph - Creates a new memset node and adds it to graph with - dependencies specified via dependencies. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - The element size must be 1, 2, or 4 bytes. - When the graph is launched, the node will perform the memset described by memsetParams. + Copy data from host to device memory - can be null - When the graph is launched, the node will perform the memset on deviceVariable. - Value to set - Cuda context used for the operation - A handle to the new node will be returned. + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a memcpy node and adds it to a graph - Creates a new memcpy node and adds it to graph with - dependencies specified via dependencies. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - A handle to the new node will be returned. - When the graph is launched, the node will perform the memcpy described by copyParams. - See ::cuMemcpy3D() for a description of the structure and its restrictions. - Memcpy nodes have some additional restrictions with regards to managed memory, if the - system contains at least one device which has a zero value for the device attribute - ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. If one or more of the operands refer - to managed memory, then using the memory type ::CU_MEMORYTYPE_UNIFIED is disallowed - for those operand(s). The managed memory will be treated as residing on either the - host or the device, depending on which memory type is specified. + Copy data from host to device memory - can be null - Parameters for the memory copy - Cuda context used for the operation - A handle to the new node will be returned. + Destination CUdeviceptr (Pointer to device memory) + Source value - + - Creates a kernel execution node and adds it to a graph - Creates a new kernel execution node and adds it to the graph with - dependencies specified via dependencies and arguments specified in nodeParams. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - A handle to the new node will be returned. + Copy data from device to host memory - can be null - Parameters for the GPU execution node - A handle to the new node will be returned. + T must be of value type, i.e. a struct + Destination data in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a kernel execution node and adds it to a graph - Creates a new kernel execution node and adds it to the graph with - dependencies specified via dependencies and arguments specified in nodeParams. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - A handle to the new node will be returned. + Copy data from device to host memory - can be null - Kernel to execute - Kernel parameters to pass. An Array of IntPtr each of them pointing to a parameters. Note that the parameters must be pinned by GC! - Extra data - A handle to the new node will be returned. + T must be of value type, i.e. a struct + Destination data in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a child graph node and adds it to a graph - Creates a new node which executes an embedded graph, and adds it to this Graph with - dependencies specified via dependencies. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - A handle to the new node will be returned. - The node executes an embedded child graph. The child graph is cloned in this call. + Copy data from device to host memory - can be null - - A handle to the new node will be returned. + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a host execution node and adds it to a graph - Creates a new CPU execution node and adds it to the graph with - dependencies specified via dependencies. - It is possible for dependencies to be null, in which case the node will be placed - at the root of the graph. Dependencies may not have any duplicate entries. - A handle to the new node will be returned. - When the graph is launched, the node will invoke the specified CPU function. + Copy data from device to host memory - can be null - Host function to execute - User data for host function. Note that the data object must be pinned by GC! - A handle to the new node will be returned. + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Clones a graph - This function creates a copy of the original Graph. - All parameters are copied into the cloned graph. The original graph may be modified - after this call without affecting the clone. - Child graph nodes in the original graph are recursively copied into the clone. + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Finds a cloned version of a node - This function returns the node corresponding to originalNode - in the original graph. - This cloned graph must have been cloned from the original Graph via its Clone() method. - OriginalNode must have been in that graph at the time of the call to - Clone(), and the corresponding cloned node in this graph must not have - been removed. The cloned node is then returned. + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns a graph's nodes + Copy data from device to host memory - + Destination pointer to host memory + Source CUdeviceptr (Pointer to device memory) + Number of bytes to copy - + - Returns a graph's root nodes + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns a graph's dependency edges + Copy data from device to host memory - - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Adds dependency edges to a graph - Elements in from and to at corresponding indices define a dependency. - Each node in from and to must belong to this Graph. - Specifying an existing dependency will return an error. + Copy data from device to host memory - - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Removes dependency edges to a graph - Elements in from and to at corresponding indices define a dependency. - Each node in from and to must belong to this Graph. - Specifying an existing dependency will return an error. + Copy data from device to host memory - - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates an executable graph from a graph - Instantiates this Graph as an executable graph. The graph is validated for any - structural constraints or intra-node constraints which were not previously - validated. If instantiation is successful, a handle to the instantiated graph - is returned. + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns the inner graph handle + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Represents an executable Cuda graph. + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - For clone graph method + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - For dispose + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Dispose + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - For IDisposable + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Launches an executable graph in a stream. - Only one instance of GraphExec may be executing - at a time. Each launch is ordered behind both any previous work in Stream - and any previous launches of GraphExec.To execute a graph concurrently, it must be - instantiated multiple times into multiple executable graphs. + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns the inner executable graph handle + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - A list of JIT compiler / linker option passed to Cuda. - If buffer options are used (i.e. InfoLogBuffer and ErrorLogBuffer), this - collection should only be used once as buffer size is overwritten by Cuda. - To copy data from unmanaged to managed memory, call after - the API call that produced output data. - Maximum number of options is limited to 30. + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - - - - + - Add a single option to the collection. + Copy data from device to host memory - Option to add + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - A multiple options to the collection. + Copy data from device to host memory - Options to add + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Copy data from unmanaged to managed memory + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Reset values returned from Cuda API for info and error buffers. + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - For dispose + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Dispose + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - For IDisposable + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Online compiler options + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Option value converted to (void *) + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Option + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - - - - + - For dispose + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Dispose + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - For IDisposable + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Max number of registers that a thread may use. - Option type: unsigned int - Applies to: compiler only + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Max number of registers that a thread may use. - Option type: unsigned int - Applies to: compiler only + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - IN: Specifies minimum number of threads per block to target compilation - for - OUT: Returns the number of threads the compiler actually targeted. - This restricts the resource utilization fo the compiler (e.g. max - registers) such that a block with the given number of threads should be - able to launch based on register limitations. Note, this option does not - currently take into account any other resource limitations, such as - shared memory utilization. - Option type: unsigned int - Applies to: compiler only + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - IN: Specifies minimum number of threads per block to target compilation - for - OUT: Returns the number of threads the compiler actually targeted. - This restricts the resource utilization fo the compiler (e.g. max - registers) such that a block with the given number of threads should be - able to launch based on register limitations. Note, this option does not - currently take into account any other resource limitations, such as - shared memory utilization. - Option type: unsigned int - Applies to: compiler only + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns the number of threads the compiler actually targeted. - This restricts the resource utilization fo the compiler (e.g. max - registers) such that a block with the given number of threads should be - able to launch based on register limitations. Note, this option does not - currently take into account any other resource limitations, such as - shared memory utilization. - The value is only valid after a succesful call to + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns a float value in the option of the wall clock time, in - milliseconds, spent creating the cubin - Option type: float - Applies to: compiler and linker + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns a float value in the option of the wall clock time, in - milliseconds, spent creating the cubin - Option type: float - Applies to: compiler and linker + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns a float value in the option of the wall clock time, in - milliseconds, spent creating the cubin - Option type: float - Applies to: compiler and linker - The value is only valid after a succesful call to + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Pointer to a buffer in which to print any log messsages from PTXAS - that are informational in nature (the buffer size is specified via - option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) - Option type: char* - Applies to: compiler and linker - You must free the internal buffer array manually after use by calling ! + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Pointer to a buffer in which to print any log messsages from PTXAS - that are informational in nature - Option type: char* - Applies to: compiler and linker - You must free the internal buffer array manually after use by calling ! + Copy data from device to host memory - Size of the internal buffer array + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - ManagedCuda allocates an byte array as buffer and pins it in order to pass it to Cuda. - You must free the buffer manually if the buffer is not needed anymore. + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns the buffer converted to string. - The value is only valid after a succesful call to + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Pointer to a buffer in which to print any log messages from PTXAS that - reflect errors - Option type: char* - Applies to: compiler and linker - You must free the internal buffer array manually after use by calling ! + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Pointer to a buffer in which to print any log messages from PTXAS that - reflect errors - Option type: char* - Applies to: compiler and linker - You must free the internal buffer array manually after use by calling ! + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - ManagedCuda allocates an byte array as buffer and pins it in order to pass it to Cuda. - You must free the buffer manually if the buffer is not needed anymore. + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Returns the buffer converted to string. - The value is only valid after a succesful call to + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - + Copy data from device to host memory - + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Level of optimizations to apply to generated code (0 - 4), with 4 - being the default and highest level of optimizations. - Option type: unsigned int - Applies to: compiler only + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Level of optimizations to apply to generated code (0 - 4), with 4 - being the default and highest level of optimizations. - Option type: unsigned int - Applies to: compiler only + Copy data from device to host memory - Level of optimizations to apply to generated code (0 - 4), with 4 - being the default and highest level of optimizations. + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - No option value required. Determines the target based on the current - attached context (default) - Option type: No option value needed - Applies to: compiler and linker + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Determines the target based on the current attached context (default) - Option type: No option value needed - Applies to: compiler and linker + Copy data from device to host memory + Destination array in host memory + Source CUdeviceptr (Pointer to device memory) - + - Target is chosen based on supplied . - Option type: unsigned int for enumerated type - Applies to: compiler and linker + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Target is chosen based on supplied ::CUjit_target_enum. - Option type: unsigned int for enumerated type ::CUjit_target_enum - Applies to: compiler and linker + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Specifies choice of fallback strategy if matching cubin is not found. - Choice is based on supplied . - Option type: unsigned int for enumerated type - Applies to: compiler only + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Specifies choice of fallback strategy if matching cubin is not found. - Choice is based on supplied . - Option type: unsigned int for enumerated type - Applies to: compiler only + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Specifies whether to create debug information in output (-g) (0: false, default) - Option type: int - Applies to: compiler and linker + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Specifies whether to create debug information in output (-g) (0: false, default) - Option type: int - Applies to: compiler and linker + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Generate verbose log messages (0: false, default) - Option type: int - Applies to: compiler and linker + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Generate verbose log messages (0: false, default) - Option type: int - Applies to: compiler and linker + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Generate line number information (-lineinfo) (0: false, default) - Option type: int - Applies to: compiler only + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Generate line number information (-lineinfo) (0: false, default) - Option type: int - Applies to: compiler only + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Specifies whether to enable caching explicitly (-dlcm) - Choice is based on supplied . - Option type: unsigned int for enumerated type - Applies to: compiler only + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Specifies whether to enable caching explicitly (-dlcm) - Choice is based on supplied . - Option type: unsigned int for enumerated type - Applies to: compiler only + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - A pending JIT linker invocation. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a pending JIT linker invocation. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a pending JIT linker invocation. + Copy data from device to host memory - Collection of linker and compiler options + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - For dispose + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Dispose - Destroys state for a JIT linker invocation. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - For IDisposable. - Destroys state for a JIT linker invocation. + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Add an input to a pending linker invocation. + Copy data from device to host memory - The input data. PTX must be NULL-terminated. - The type of the input data. - An optional name for this input in log messages. - Collection of linker and compiler options + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Add an input to a pending linker invocation. + Copy data from device to host memory - The input data. PTX must be NULL-terminated. - The type of the input data. - An optional name for this input in log messages. - Collection of linker and compiler options + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Add an input to a pending linker invocation. + Copy data from device to host memory - Path to the input file. - The type of the input data. - Collection of linker and compiler options + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Complete a pending linker invocation. - Completes the pending linker action and returns the cubin image for the linked - device code, which can be used with ::cuModuleLoadData. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - A variable located in managed memory. - Type: byte + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Copy data from device to host memory - In elements - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from device to host memory - The module where the variable is defined in. - The variable name as defined in the cu-file. + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from device to host memory - The kernel which module defines the variable. - The variable name as defined in the cu-file. + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - For dispose + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Dispose + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - For IDisposable + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - UIntPtr to managed memory. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - CUdeviceptr to managed memory. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Size in bytes + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Size in elements + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Access array per element. + Copy data from device to host memory - index in elements - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Copy data from device to host memory - managed variable - newly allocated host variable with value from managed memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - The on which a pointer was allocated or registered + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - The describing the physical location of a pointer + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - The address at which a pointer's memory may be accessed on the host + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Synchronize every synchronous memory operation initiated on this region + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - A process-wide unique ID for an allocated memory region + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Indicates if the pointer points to managed memory + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Copy data from device to host memory - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Copy data from device to host memory - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from device to host memory - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from device to host memory - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - Enumerator class for CudaManagedMemory_byte + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - + Copy data from device to host memory - + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - + Copy data from device to host memory + Destination value in host memory + Source CUdeviceptr (Pointer to device memory) - + - + Returns the device name of the device bound to the actual context - + Device Name - + - A variable located in managed memory. - Type: uchar1 + Returns the device's compute capability of the device bound to the actual context + Device compute capability - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Retrieve device properties - In elements - + DeviceProperties - + - Creates a new CudaManagedMemory from definition in cu-file. + Returns numerical values that correspond to the least and greatest stream priorities. + Returns in leastPriority and greatestPriority the numerical values that correspond + to the least and greatest stream priorities respectively. Stream priorities + follow a convention where lower numbers imply greater priorities. The range of + meaningful stream priorities is given by [greatestPriority, leastPriority]. + If the user attempts to create a stream with a priority value that is + outside the meaningful range as specified by this API, the priority is + automatically clamped down or up to either leastPriority or greatestPriority + respectively. See ::cuStreamCreateWithPriority for details on creating a + priority stream. + A NULL may be passed in for leastPriority or greatestPriority if the value + is not desired. + This function will return '0' in both leastPriority and greatestPriority if + the current context's device does not support stream priorities + (see ::cuDeviceGetAttribute). - The module where the variable is defined in. - The variable name as defined in the cu-file. + Pointer to an int in which the numerical value for least + stream priority is returned + Pointer to an int in which the numerical value for greatest stream priority is returned - + - Creates a new CudaManagedMemory from definition in cu-file. + Returns the current size of limit. See - The kernel which module defines the variable. - The variable name as defined in the cu-file. + Limit to query + Returned size in bytes of limit - + - For dispose + Setting limit to value is a request by the application to update the current limit maintained by the context. The + driver is free to modify the requested value to meet h/w requirements (this could be clamping to minimum or maximum + values, rounding up to nearest element size, etc). The application can use to find out exactly what + the limit has been set to. + Setting each has its own specific restrictions, so each is discussed here: + + ValueRestriction + + controls the stack size of each GPU thread. This limit is only applicable to devices + of compute capability 2.0 and higher. Attempting to set this limit on devices of compute capability less than 2.0 + will result in the error being returned. + + + controls the size of the FIFO used by the printf() device system call. Setting + must be performed before loading any module that uses the printf() device + system call, otherwise will be returned. This limit is only applicable to + devices of compute capability 2.0 and higher. Attempting to set this limit on devices of compute capability less + than 2.0 will result in the error being returned. + + + controls the size in bytes of the heap used by the ::malloc() and ::free() device system calls. Setting + must be performed before launching any kernel that uses the ::malloc() or ::free() device system calls, otherwise + will be returned. This limit is only applicable to + devices of compute capability 2.0 and higher. Attempting to set this limit on devices of compute capability less + than 2.0 will result in the error being returned. + + + controls the maximum nesting depth of a grid at which a thread can safely call ::cudaDeviceSynchronize(). Setting + this limit must be performed before any launch of a kernel that uses the + device runtime and calls ::cudaDeviceSynchronize() above the default sync + depth, two levels of grids. Calls to ::cudaDeviceSynchronize() will fail + with error code ::cudaErrorSyncDepthExceeded if the limitation is + violated. This limit can be set smaller than the default or up the maximum + launch depth of 24. When setting this limit, keep in mind that additional + levels of sync depth require the driver to reserve large amounts of device + memory which can no longer be used for user allocations. If these + reservations of device memory fail, ::cuCtxSetLimit will return + , and the limit can be reset to a lower value. + This limit is only applicable to devices of compute capability 3.5 and + higher. Attempting to set this limit on devices of compute capability less + than 3.5 will result in the error being + returned. + + + controls the maximum number of + outstanding device runtime launches that can be made from the current + context. A grid is outstanding from the point of launch up until the grid + is known to have been completed. Device runtime launches which violate + this limitation fail and return ::cudaErrorLaunchPendingCountExceeded when + ::cudaGetLastError() is called after launch. If more pending launches than + the default (2048 launches) are needed for a module using the device + runtime, this limit can be increased. Keep in mind that being able to + sustain additional pending launches will require the driver to reserve + larger amounts of device memory upfront which can no longer be used for + allocations. If these reservations fail, ::cuCtxSetLimit will return + , and the limit can be reset to a lower value. + This limit is only applicable to devices of compute capability 3.5 and + higher. Attempting to set this limit on devices of compute capability less + than 3.5 will result in the error being + returned. + + + Limit to set + Size in bytes of limit - + - Dispose + As the normal context constructor has the same arguments, the OpenGL-constructor is private with inverse arguement order. + It has to be called from a static method. + Create a new instance of managed CUDA for a OpenGL-device. + OpenGL resources from this device may be registered and mapped through the lifetime of this CUDA context. + CUdevice to map this context to. + Context creation flags - + - For IDisposable + Gets the CUDA devices associated with the current OpenGL context - + SLI parameter + - + - UIntPtr to managed memory. + Returns a list of possible CUDA devices to use for a given DirectX device + DirectX device + SLI parameter + DirectX version of the directX device + - + - CUdeviceptr to managed memory. + Returns the Direct3D device against which the CUDA context, bound to the calling thread, + was created. + + - + - Size in bytes + Returns the device name of the device with ID deviceID + + Device Name - + - Size in elements + Returns the device's compute capability of the device with ID deviceID + + Device compute capability - + - Access array per element. + Returns the version number of the installed cuda driver - index in elements - + CUDA driver version - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Retrieve device properties + Device ID + DeviceProperties - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Get the number of CUDA capable devices - managed variable - newly allocated host variable with value from managed memory + - + - The on which a pointer was allocated or registered + If both the current context (current to the calling thread) and peerContext are on devices which support unified + addressing (as may be queried using GetDeviceInfo), then + on success all allocations from peerContext will immediately be accessible + by the current context. See \ref CUDA_UNIFIED for additional + details. + Note that access granted by this call is unidirectional and that in order to access + memory from the current context in peerContext, a separate symmetric call + to ::cuCtxEnablePeerAccess() is required. + Returns if indicates + that the CUdevice of the current context cannot directly access memory + from the CUdevice of peerContext. + Throws if direct access of + peerContext from the current context has already been enabled. + Throws if there is no current context, peerContext + is not a valid context, or if the current context is peerContext. + Peer context to enable direct access to from the current context + - + - The describing the physical location of a pointer + Disables direct access to memory allocations in a peer context and unregisters any registered allocations. + Peer context to disable direct access to + - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Fills the CudaDeviceProperties structure - + - The address at which a pointer's memory may be accessed on the host + Gets the CUdevice for a given device ordinal number + + - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Initialize the profiling. + Using this API user can initialize the CUDA profiler by specifying + the configuration file, output file and output file format. This + API is generally used to profile different set of counters by + looping the kernel launch. The configFile parameter can be used + to select profiling options including profiler counters. Refer to + the "Compute Command Line Profiler User Guide" for supported + profiler options and counters. + Limitation: The CUDA profiler cannot be initialized with this API + if another profiling tool is already active, as indicated by the + exception . + Name of the config file that lists the counters/options for profiling. + Name of the outputFile where the profiling results will be stored. + outputMode - + - Synchronize every synchronous memory operation initiated on this region + Enable profiling. + Enables profile collection by the active profiling tool for the + current context. If profiling is already enabled, then + cuProfilerStart() has no effect. + cuProfilerStart and cuProfilerStop APIs are used to + programmatically control the profiling granularity by allowing + profiling to be done only on selective pieces of code. - + - A process-wide unique ID for an allocated memory region + Disables profile collection by the active profiling tool for the + current context. If profiling is already disabled, then + cuProfilerStop() has no effect. + cuProfilerStart and cuProfilerStop APIs are used to + programmatically control the profiling granularity by allowing + profiling to be done only on selective pieces of code. - + - Indicates if the pointer points to managed memory + Resets all persisting lines in cache to normal status. + CtxResetPersistingL2Cache Resets all persisting lines in cache to normal + status. Takes effect on function return. - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Returns the execution affinity setting for the current context. - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Returns the maximum number of elements allocatable in a 1D linear texture for a given texture element size. + Returns in \p maxWidthInElements the maximum number of texture elements allocatable in a 1D linear texture + for given \p format and \p numChannels. - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. + Texture format. + Number of channels per texture element. + + Returned maximum number of texture elements allocatable for given \p format and \p numChannels. - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Gets the Cuda context bound to this managed Cuda object - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Gets the Cuda device allocated to the Cuda Context - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for - + - Enumerator class for CudaManagedMemory_uchar1 + Gets the Id of the Cuda device. - + - + Indicates if the CudaContext instance created the wrapped cuda context (return = true) or if the CudaContext instance was bound to an existing cuda context. - - + - + Gets the Id of the Cuda device. - + - + Number of channels in array - + - + One channel, e.g. float1, int1, float, int - + - + Two channels, e.g. float2, int2 - - + - A variable located in managed memory. - Type: uchar2 + Four channels, e.g. float4, int4 - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + An one dimensional CUDA array - In elements - - + - Creates a new CudaManagedMemory from definition in cu-file. + Creates a new CUDA array. - The module where the variable is defined in. - The variable name as defined in the cu-file. + + + - + - Creates a new CudaManagedMemory from definition in cu-file. + Creates a new CUDA array from an existing CUarray. + The CUarray won't be destroyed when disposing. + Array properties are obtained by cuArrayGetDescriptor - The kernel which module defines the variable. - The variable name as defined in the cu-file. + - + + + Creates a new CUDA array from an existing CUarray. + Array properties are obtained by cuArrayGetDescriptor + + + The cuArray will be destroyed while disposing, if the CudaArray is the owner + + For dispose - + Dispose - + For IDisposable - + - UIntPtr to managed memory. + Copy data from host to array memory + T must be of value type, i.e. a struct + source pointer to host memory + Offset in bytes of destination array - + - CUdeviceptr to managed memory. + Copy data from host to array memory + T must be of value type, i.e. a struct + source pointer to host memory + Offset in bytes of destination array - + - Size in bytes + Copy data from host to array memory + Pointer to source data + Number of bytes to copy + Offset in bytes of destination array - + - Size in elements + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Access array per element. + Copy data from host to array memory - index in elements - + Offset in bytes of destination array + source array - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Copy data from host to array memory - managed variable - newly allocated host variable with value from managed memory + Offset in bytes of destination array + source array - + - The on which a pointer was allocated or registered + Copy data from host to array memory + Offset in bytes of destination array + source array - + - The describing the physical location of a pointer + Copy data from host to array memory + Offset in bytes of destination array + source array - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Copy data from host to array memory + Offset in bytes of destination array + source array - + - The address at which a pointer's memory may be accessed on the host + Copy data from host to array memory + Offset in bytes of destination array + source array - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Synchronize every synchronous memory operation initiated on this region + Copy data from host to array memory + Offset in bytes of destination array + source array - + - A process-wide unique ID for an allocated memory region + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Indicates if the pointer points to managed memory + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Copy data from host to array memory - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - + Offset in bytes of destination array + source array - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Copy data from host to array memory - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. + Offset in bytes of destination array + source array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from host to array memory - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + Offset in bytes of destination array + source array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from host to array memory - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for + Offset in bytes of destination array + source array - + - Enumerator class for CudaManagedMemory_uchar2 + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from host to array memory - + Offset in bytes of destination array + source array - + - + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from host to array memory - + Offset in bytes of destination array + source array - + - A variable located in managed memory. - Type: uchar3 + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Copy data from host to array memory - In elements - + Offset in bytes of destination array + source array - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from host to array memory - The module where the variable is defined in. - The variable name as defined in the cu-file. + Offset in bytes of destination array + source array - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from host to array memory - The kernel which module defines the variable. - The variable name as defined in the cu-file. + Offset in bytes of destination array + source array - + - For dispose + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Dispose + Copy data from host to array memory + Offset in bytes of destination array + source array - + - For IDisposable + Copy data from host to array memory - + Offset in bytes of destination array + source array - + - UIntPtr to managed memory. + Copy data from host to array memory + Offset in bytes of destination array + source array - + - CUdeviceptr to managed memory. + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Size in bytes + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Size in elements + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Access array per element. + Copy data from host to array memory - index in elements - + Offset in bytes of destination array + source array - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Copy data from host to array memory - managed variable - newly allocated host variable with value from managed memory + Offset in bytes of destination array + source array - + - The on which a pointer was allocated or registered + Copy data from host to array memory + Offset in bytes of destination array + source array - + - The describing the physical location of a pointer + Copy data from host to array memory + Offset in bytes of destination array + source array - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Copy data from host to array memory + Offset in bytes of destination array + source array - + - The address at which a pointer's memory may be accessed on the host + Copy data from host to array memory + Offset in bytes of destination array + source array - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Synchronize every synchronous memory operation initiated on this region + Copy data from host to array memory + Offset in bytes of destination array + source array - + - A process-wide unique ID for an allocated memory region + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Indicates if the pointer points to managed memory + Copy data from host to array memory + Offset in bytes of destination array + source array - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Copy data from host to array memory - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - + Offset in bytes of destination array + source array - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Copy data from host to array memory - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. + Offset in bytes of destination array + source array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from host to array memory - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + Offset in bytes of destination array + source array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from host to array memory - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for + Offset in bytes of destination array + source array - + - Enumerator class for CudaManagedMemory_uchar3 + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from host to array memory - + Offset in bytes of destination array + source array - + - + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from host to array memory + Offset in bytes of destination array + source array - + - + Copy data from array to host memory - + T must be of value type, i.e. a struct + Destination pointer to host memory + Offset in bytes of destination array - + - A variable located in managed memory. - Type: uchar4 + Copy data from array to host memory + T must be of value type, i.e. a struct + Destination pointer to host memory + Offset in bytes of destination array - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Copy data from array to host memory - In elements - + Pointer to Destination data + Number of bytes to copy + Offset in bytes of destination array - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from array to host memory - The module where the variable is defined in. - The variable name as defined in the cu-file. + Offset in bytes of destination array + Destination array - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from array to host memory - The kernel which module defines the variable. - The variable name as defined in the cu-file. + Offset in bytes of destination array + Destination array - + - For dispose + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Dispose + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - For IDisposable + Copy data from array to host memory - + Offset in bytes of destination array + Destination array - + - UIntPtr to managed memory. + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - CUdeviceptr to managed memory. + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Size in bytes + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Size in elements + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Access array per element. + Copy data from array to host memory - index in elements - + Offset in bytes of destination array + Destination array - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Copy data from array to host memory - managed variable - newly allocated host variable with value from managed memory + Offset in bytes of destination array + Destination array - + - The on which a pointer was allocated or registered + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - The describing the physical location of a pointer + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - The address at which a pointer's memory may be accessed on the host + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Synchronize every synchronous memory operation initiated on this region + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - A process-wide unique ID for an allocated memory region + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Indicates if the pointer points to managed memory + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Copy data from array to host memory - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - + Offset in bytes of destination array + Destination array - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Copy data from array to host memory - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. + Offset in bytes of destination array + Destination array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from array to host memory - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + Offset in bytes of destination array + Destination array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from array to host memory - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for + Offset in bytes of destination array + Destination array - + - Enumerator class for CudaManagedMemory_uchar4 + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - + Copy data from array to host memory - + Offset in bytes of destination array + Destination array - + - + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - + Copy data from array to host memory - + Offset in bytes of destination array + Destination array - + - A variable located in managed memory. - Type: sbyte + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Copy data from array to host memory - In elements - + Offset in bytes of destination array + Destination array - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from array to host memory - The module where the variable is defined in. - The variable name as defined in the cu-file. + Offset in bytes of destination array + Destination array - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy data from array to host memory - The kernel which module defines the variable. - The variable name as defined in the cu-file. + Offset in bytes of destination array + Destination array - + - For dispose + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Dispose + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - For IDisposable + Copy data from array to host memory - + Offset in bytes of destination array + Destination array - + - UIntPtr to managed memory. + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - CUdeviceptr to managed memory. + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Size in bytes + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Size in elements + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Access array per element. + Copy data from array to host memory - index in elements - + Offset in bytes of destination array + Destination array - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Copy data from array to host memory - managed variable - newly allocated host variable with value from managed memory + Offset in bytes of destination array + Destination array - + - The on which a pointer was allocated or registered + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - The describing the physical location of a pointer + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Copy data from array to host memory - - + Offset in bytes of destination array + Destination array + + - The address at which a pointer's memory may be accessed on the host + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Synchronize every synchronous memory operation initiated on this region + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - A process-wide unique ID for an allocated memory region + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Indicates if the pointer points to managed memory + Copy data from array to host memory + Offset in bytes of destination array + Destination array - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Copy data from array to host memory - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - + Offset in bytes of destination array + Destination array - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Copy data from array to array - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. + Destination array + source array + Size of memory copy in bytes + Offset in bytes of destination array + Offset in bytes of source array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from array to array - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + Destination array + Size of memory copy in bytes + Offset in bytes of destination array + Offset in bytes of source array - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Copy data from array to array - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for + Destination array + Size of memory copy in bytes + Offset in bytes of destination array + Offset in bytes of source array - + - Enumerator class for CudaManagedMemory_sbyte + Copy data from array to device + + DevicePointer to copy data to + number of bytes to copy + Offset in bytes of source array + + + + Copy data from device to array + DevicePointer to copy data from + number of bytes to copy + Offset in bytes of source array - + - + Returns the array width in elements - - + - + Returns the array width in bytes - + - + Returns the wrapped CUarray - + - + Returns the wrapped CUDAArrayDescriptor - + - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - + - A variable located in managed memory. - Type: char1 + Number of channels in array - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + One channel, e.g. float1, int1, float, int - In elements - - + - Creates a new CudaManagedMemory from definition in cu-file. + Two channels, e.g. float2, int2 - The module where the variable is defined in. - The variable name as defined in the cu-file. - + - Creates a new CudaManagedMemory from definition in cu-file. + Four channels, e.g. float4, int4 - The kernel which module defines the variable. - The variable name as defined in the cu-file. - + + + A two dimensional CUDA array + + + + + Creates a new CUDA array. + + + In elements + In elements + + + + + Creates a new CUDA array from an existing CUarray. + The CUarray won't be destroyed when disposing. + Array properties are obtained by cuArrayGetDescriptor + + + + + + Creates a new CUDA array from an existing CUarray. + Array properties are obtained by cuArrayGetDescriptor + + + The cuArray will be destroyed while disposing if the CudaArray is the owner + + For dispose - + Dispose - + For IDisposable - + - UIntPtr to managed memory. + A raw data copy method + 2D copy paramters - + - CUdeviceptr to managed memory. + A raw unaligned copy method + - + - Size in bytes + Copy from Host to this array + Source + - + - Size in elements + Copy data from this array to host + IntPtr to destination in host memory + - + - Access array per element. + Copy from Host to this array - index in elements - + Host array base type + Source - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy data from this array to host + Host array base type + Destination - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Copy from a pitched device variable to this array - managed variable - newly allocated host variable with value from managed memory + device variable base type + Source - + - The on which a pointer was allocated or registered + Copy from this array to a pitched device variable + device variable base type + Destination - + - The describing the physical location of a pointer + Copy array to array + - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Copy array to array + - + - The address at which a pointer's memory may be accessed on the host + Returns the wrapped CUarray - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Returns the wrapped CUDAArrayDescriptor - + - Synchronize every synchronous memory operation initiated on this region + Returns the Height of the array - + - A process-wide unique ID for an allocated memory region + Returns the array width in elements - + - Indicates if the pointer points to managed memory + Returns the array width in bytes - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Number of channels in array - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + One channel, e.g. float1, int1, float, int - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Two channels, e.g. float2, int2 - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for - + - Enumerator class for CudaManagedMemory_char1 + Four channels, e.g. float4, int4 - + - + A three dimensional CUDA array - - + - + Creates a new CUDA array. + + In elements + In elements + In elements + + - + - + Creates a new CUDA array from an existing CUarray. + The CUarray won't be destroyed when disposing. + Array properties are obtained by cuArrayGetDescriptor + - + - + Creates a new CUDA array from an existing CUarray. + Array properties are obtained by cuArrayGetDescriptor + + The cuArray will be destroyed while disposing, if the CudaArray is the owner - + - + For dispose - - + - A variable located in managed memory. - Type: char2 + Dispose - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + For IDisposable - In elements - + - + - Creates a new CudaManagedMemory from definition in cu-file. + A raw data copy method - The module where the variable is defined in. - The variable name as defined in the cu-file. + 3D copy paramters - + - Creates a new CudaManagedMemory from definition in cu-file. + Copy from Host to this array - The kernel which module defines the variable. - The variable name as defined in the cu-file. + Source + - + - For dispose + Copy data from this array to host + IntPtr to destination in host memory + - + - Dispose + Copy from Host to this array + Host array base type + Source - + - For IDisposable + Copy data from this array to host - + Host array base type + Destination - + - UIntPtr to managed memory. + Copy from a pitched device variable to this array + Source + - + - CUdeviceptr to managed memory. + Copy from a pitched device variable to this array + Source + + Pitch in bytes - + - Size in bytes + Copy from this array to a pitched device variable + Destination + - + - Size in elements + Copy from this array to a pitched device variable + Destination + + Pitch in bytes - + - Access array per element. + Copy array to array - index in elements - + - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Copy array to array + - - - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + + /// + Returns the layout properties of a sparse CUDA array + Returns the layout properties of a sparse CUDA array in \p sparseProperties + If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_SPARSE ::CUDA_ERROR_INVALID_VALUE will be returned. + If the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, + then::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize represents the total size of the array.Otherwise, it will be zero. + Also, the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is always zero. + Note that the \p array must have been allocated using ::cuArrayCreate or::cuArray3DCreate.For CUDA arrays obtained + using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned.Instead, ::cuMipmappedArrayGetSparseProperties + must be used to obtain the sparse properties of the entire CUDA mipmapped array to which \p array belongs to. + + + + + Gets a CUDA array plane from a CUDA array + Returns a CUDA array that represents a single format plane + of the CUDA array \p hArray. + If planeIdx is greater than the maximum number of planes in this array or if the array does + not have a multi-planar format e.g: ::CU_AD_FORMAT_NV12, then::CUDA_ERROR_INVALID_VALUE is returned. + Note that if the \p hArray has format ::CU_AD_FORMAT_NV12, then passing in 0 for \p planeIdx returns + a CUDA array of the same size as \p hArray but with one channel and::CU_AD_FORMAT_UNSIGNED_INT8 as its format. + If 1 is passed for \p planeIdx, then the returned CUDA array has half the height and width + of \p hArray with two channels and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format. - managed variable - newly allocated host variable with value from managed memory - + - The on which a pointer was allocated or registered + Returns the wrapped CUarray - + - The describing the physical location of a pointer + Returns the wrapped CUDAArray3DDescriptor - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Returns the Depth of the array - + - The address at which a pointer's memory may be accessed on the host + Returns the Height of the array - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Returns the array width in elements - + - Synchronize every synchronous memory operation initiated on this region + Returns the array width in bytes - + - A process-wide unique ID for an allocated memory region + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + - Indicates if the pointer points to managed memory + Wrapps a CUevent handle. - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Creates a new Event using - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Creates a new Event using - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Creates a new Event - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + Parameters for event creation - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + For dispose - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for - + - Enumerator class for CudaManagedMemory_char2 + Dispose - + - + For IDisposable - + - + - + returns the wrapped CUevent handle - + - + Records an event. If stream is non-zero, the event is recorded after all preceding operations in the stream have been + completed; otherwise, it is recorded after all preceding operations in the CUDA context have been completed. Since + operation is asynchronous, and/or must be used to determine when the event + has actually been recorded. + If has previously been called and the event has not been recorded yet, this function throws + . - + - + Records an event. If stream is non-zero, the event is recorded after all preceding operations in the stream have been + completed; otherwise, it is recorded after all preceding operations in the CUDA context have been completed. Since + operation is asynchronous, and/or must be used to determine when the event + has actually been recorded. + If has previously been called and the event has not been recorded yet, this function throws + . + - + - + Records an event + Captures in \p hEvent the contents of \p hStream at the time of this call. + \p hEvent and \p hStream must be from the same context. + Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then + examine or wait for completion of the work that was captured.Uses of + \p hStream after this call do not modify \p hEvent. See note on default + stream behavior for what is captured in the default case. + ::cuEventRecordWithFlags() can be called multiple times on the same event and + will overwrite the previously captured state.Other APIs such as + ::cuStreamWaitEvent() use the most recently captured state at the time + of the API call, and are not affected by later calls to + ::cuEventRecordWithFlags(). Before the first call to::cuEventRecordWithFlags(), an + event represents an empty set of work, so for example::cuEventQuery() + would return ::CUDA_SUCCESS. + + + + + Waits until the event has actually been recorded. If has been called on this event, the function returns + . Waiting for an event that was created with the + flag will cause the calling CPU thread to block until the event has actually been recorded. + If has previously been called and the event has not been recorded yet, this function throws . + + + + + Returns true if the event has actually been recorded, or false if not. If + has not been called on this event, the function throws . - + - A variable located in managed memory. - Type: char3 + Computes the elapsed time between two events (in milliseconds with a resolution of around 0.5 microseconds). If + either event has not been recorded yet, this function throws . If either event has been + recorded with a non-zero stream, the result is undefined. + + + - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Represents a Cuda graph. On disose() all graph nodes will be distroyed, too! - In elements - - + - Creates a new CudaManagedMemory from definition in cu-file. + Creates a new CudaGraph - The module where the variable is defined in. - The variable name as defined in the cu-file. - + - Creates a new CudaManagedMemory from definition in cu-file. + For clone graph method - The kernel which module defines the variable. - The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - - - UIntPtr to managed memory. - - - + - CUdeviceptr to managed memory. + Creates an empty node and adds it to a graph + Creates a new node which performs no operation, and adds it to to the graph with + dependencies specified via dependencies. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + + An empty node performs no operation during execution, but can be used for + transitive ordering. For example, a phased execution graph with 2 groups of n + nodes with a barrier between them can be represented using an empty node and + 2*n dependency edges, rather than no empty node and n^2 dependency edges. + can be null + A handle to the new node will be returned. - + - Size in bytes + Creates a memset node and adds it to a graph + Creates a new memset node and adds it to graph with + dependencies specified via dependencies. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + The element size must be 1, 2, or 4 bytes. + When the graph is launched, the node will perform the memset described by memsetParams. + can be null + When the graph is launched, the node will perform the memset described by memsetParams. + Cuda context used for the operation + A handle to the new node will be returned. - + - Size in elements + Creates a memset node and adds it to a graph + Creates a new memset node and adds it to graph with + dependencies specified via dependencies. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + The element size must be 1, 2, or 4 bytes. + When the graph is launched, the node will perform the memset described by memsetParams. + can be null + When the graph is launched, the node will perform the memset on deviceVariable. + Value to set + Cuda context used for the operation + A handle to the new node will be returned. - + - Access array per element. + Creates a memset node and adds it to a graph + Creates a new memset node and adds it to graph with + dependencies specified via dependencies. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + The element size must be 1, 2, or 4 bytes. + When the graph is launched, the node will perform the memset described by memsetParams. - index in elements - + can be null + When the graph is launched, the node will perform the memset on deviceVariable. + Value to set + Cuda context used for the operation + A handle to the new node will be returned. - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Creates a memcpy node and adds it to a graph + Creates a new memcpy node and adds it to graph with + dependencies specified via dependencies. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + A handle to the new node will be returned. + When the graph is launched, the node will perform the memcpy described by copyParams. + See ::cuMemcpy3D() for a description of the structure and its restrictions. + Memcpy nodes have some additional restrictions with regards to managed memory, if the + system contains at least one device which has a zero value for the device attribute + ::CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS. If one or more of the operands refer + to managed memory, then using the memory type ::CU_MEMORYTYPE_UNIFIED is disallowed + for those operand(s). The managed memory will be treated as residing on either the + host or the device, depending on which memory type is specified. + can be null + Parameters for the memory copy + Cuda context used for the operation + A handle to the new node will be returned. - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Creates a kernel execution node and adds it to a graph + Creates a new kernel execution node and adds it to the graph with + dependencies specified via dependencies and arguments specified in nodeParams. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + A handle to the new node will be returned. - managed variable - newly allocated host variable with value from managed memory + can be null + Parameters for the GPU execution node + A handle to the new node will be returned. - + - The on which a pointer was allocated or registered + Creates a kernel execution node and adds it to a graph + Creates a new kernel execution node and adds it to the graph with + dependencies specified via dependencies and arguments specified in nodeParams. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + A handle to the new node will be returned. + can be null + Kernel to execute + Kernel parameters to pass. An Array of IntPtr each of them pointing to a parameters. Note that the parameters must be pinned by GC! + Extra data + A handle to the new node will be returned. - + - The describing the physical location of a pointer + Creates a child graph node and adds it to a graph + Creates a new node which executes an embedded graph, and adds it to this Graph with + dependencies specified via dependencies. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + A handle to the new node will be returned. + The node executes an embedded child graph. The child graph is cloned in this call. + can be null + + A handle to the new node will be returned. - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Creates a host execution node and adds it to a graph + Creates a new CPU execution node and adds it to the graph with + dependencies specified via dependencies. + It is possible for dependencies to be null, in which case the node will be placed + at the root of the graph. Dependencies may not have any duplicate entries. + A handle to the new node will be returned. + When the graph is launched, the node will invoke the specified CPU function. + can be null + Host function to execute + User data for host function. Note that the data object must be pinned by GC! + A handle to the new node will be returned. - + - The address at which a pointer's memory may be accessed on the host + Creates an event record node and adds it to a graph + Creates a new event record node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p params. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + Each launch of the graph will record \p event to capture execution of the + node's dependencies. + Dependencies of the node + Event for the node + Returns newly created node - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Creates an event wait node and adds it to a graph + Creates a new event wait node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p params. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + The graph node will wait for all work captured in \p event. See ::cuEventRecord() + for details on what is captured by an event. \p event may be from a different context + or device than the launch stream. + Dependencies of the node + Event for the node + Returns newly created node - + - Synchronize every synchronous memory operation initiated on this region + Creates an external semaphore signal node and adds it to a graph + Creates a new external semaphore signal node and adds it to \p hGraph with \p + numDependencies dependencies specified via \p dependencies and arguments specified + in \p nodeParams.It is possible for \p numDependencies to be 0, in which case the + node will be placed at the root of the graph. \p dependencies may not have any + duplicate entries. A handle to the new node will be returned in \p phGraphNode. + Dependencies of the node + Parameters for the node + Returns newly created node - + - A process-wide unique ID for an allocated memory region + Creates an external semaphore wait node and adds it to a graph + Creates a new external semaphore wait node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + Dependencies of the node + Parameters for the node + Returns newly created node - + - Indicates if the pointer points to managed memory + Creates an allocation node and adds it to a graph + Creates a new allocation node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + Dependencies of the node + Parameters for the node + Returns newly created node - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Creates a memory free node and adds it to a graph + Creates a new memory free node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - + Dependencies of the node + Parameters for the node + Returns newly created node - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Clones a graph + This function creates a copy of the original Graph. + All parameters are copied into the cloned graph. The original graph may be modified + after this call without affecting the clone. + Child graph nodes in the original graph are recursively copied into the clone. - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Finds a cloned version of a node + This function returns the node corresponding to originalNode + in the original graph. + This cloned graph must have been cloned from the original Graph via its Clone() method. + OriginalNode must have been in that graph at the time of the call to + Clone(), and the corresponding cloned node in this graph must not have + been removed. The cloned node is then returned. - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Returns a graph's nodes - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for + - + - Enumerator class for CudaManagedMemory_char3 + Returns a graph's root nodes + - + - + Returns a graph's dependency edges - + + - + - + Adds dependency edges to a graph + Elements in from and to at corresponding indices define a dependency. + Each node in from and to must belong to this Graph. + Specifying an existing dependency will return an error. + + - + - + Removes dependency edges to a graph + Elements in from and to at corresponding indices define a dependency. + Each node in from and to must belong to this Graph. + Specifying an existing dependency will return an error. + + - + - + Creates an executable graph from a graph + Instantiates this Graph as an executable graph. The graph is validated for any + structural constraints or intra-node constraints which were not previously + validated. If instantiation is successful, a handle to the instantiated graph + is returned. - + - + Creates an executable graph from a graph + Instantiates \p hGraph as an executable graph. The graph is validated for any + structural constraints or intra-node constraints which were not previously + validated.If instantiation is successful, a handle to the instantiated graph + is returned in \p phGraphExec. - - + - A variable located in managed memory. - Type: char4 + Write a DOT file describing graph structure + Using the provided \p hGraph, write to \p path a DOT formatted description of the graph. + By default this includes the graph topology, node types, node id, kernel names and memcpy direction. + \p flags can be specified to write more detailed information about each node type such as + parameter values, kernel attributes, node and function handles. + The path to write the DOT file to + Flags from CUgraphDebugDot_flags for specifying which additional node information to write - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Returns the inner graph handle - In elements - - + - Creates a new CudaManagedMemory from definition in cu-file. + Represents an executable Cuda graph. - The module where the variable is defined in. - The variable name as defined in the cu-file. - + - Creates a new CudaManagedMemory from definition in cu-file. + For clone graph method - The kernel which module defines the variable. - The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + - UIntPtr to managed memory. + Launches an executable graph in a stream. + Only one instance of GraphExec may be executing + at a time. Each launch is ordered behind both any previous work in Stream + and any previous launches of GraphExec.To execute a graph concurrently, it must be + instantiated multiple times into multiple executable graphs. + - + - CUdeviceptr to managed memory. + Uploads an executable graph in a stream + Uploads \p hGraphExec to the device in \p hStream without executing it.Uploads of + the same \p hGraphExec will be serialized.Each upload is ordered behind both any + previous work in \p hStream and any previous launches of \p hGraphExec. + Stream in which to upload the graph + - + - Size in bytes + Sets the parameters for a kernel node in the given graphExec + Sets the parameters of a kernel node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + \p hNode must not have been removed from the original graph.The \p func field + of \p nodeParams cannot be modified and must match the original value. + All other values can be modified. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + - - - Size in elements + + + Sets the parameters for a memcpy node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p copyParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + The source and destination memory in \p copyParams must be allocated from the same + contexts as the original source and destination memory. Both the instantiation-time + memory operands and the memory operands in \p copyParams must be 1-dimensional. + Zero-length operations are not supported. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + Returns CUDA_ERROR_INVALID_VALUE if the memory operands' mappings changed or + either the original or new memory operands are multidimensional. + + + + + Sets the parameters for a memset node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p memsetParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + The destination memory in \p memsetParams must be allocated from the same + contexts as the original destination memory. Both the instantiation-time + memory operand and the memory operand in \p memsetParams must be 1-dimensional. + Zero-length operations are not supported. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + Returns CUDA_ERROR_INVALID_VALUE if the memory operand's mappings changed or + either the original or new memory operand are multidimensional. + + + + + Sets the parameters for a host node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p nodeParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + + + + + Updates node parameters in the child graph node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though the nodes contained + in \p hNode's graph had the parameters contained in \p childGraph's nodes at instantiation. + \p hNode must remain in the graph which was used to instantiate \p hGraphExec. + Changed edges to and from \p hNode are ignored. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. \p hNode is also + not modified by this call. + The topology of \p childGraph, as well as the node insertion order, must match that + of the graph contained in \p hNode. See::cuGraphExecUpdate() for a list of restrictions + on what can be updated in an instantiated graph.The update is recursive, so child graph + nodes contained within the top level child graph will also be updated. + + + + + Sets the parameters for an external semaphore signal node in the given graphExec + Sets the parameters of an external semaphore signal node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + hNode must not have been removed from the original graph. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + hNode is also not modified by this call. + Changing \p nodeParams->numExtSems is not supported. + + + + + Sets the parameters for an external semaphore wait node in the given graphExec + Sets the parameters of an external semaphore wait node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + hNode must not have been removed from the original graph. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + hNode is also not modified by this call. + Changing \p nodeParams->numExtSems is not supported. + + + + + Sets the event for an event record node in the given graphExec + Sets the event of an event record node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + + + + Sets the event for an event record node in the given graphExec + Sets the event of an event record node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. - + - Access array per element. + Returns the inner executable graph handle - index in elements - - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + A list of JIT compiler / linker option passed to Cuda. + If buffer options are used (i.e. InfoLogBuffer and ErrorLogBuffer), this + collection should only be used once as buffer size is overwritten by Cuda. + To copy data from unmanaged to managed memory, call after + the API call that produced output data. + Maximum number of options is limited to 30. - + + + + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Add a single option to the collection. - managed variable - newly allocated host variable with value from managed memory + Option to add - + - The on which a pointer was allocated or registered + A multiple options to the collection. + Options to add - + - The describing the physical location of a pointer + Copy data from unmanaged to managed memory - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Reset values returned from Cuda API for info and error buffers. - + - The address at which a pointer's memory may be accessed on the host + For dispose - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Dispose - + - Synchronize every synchronous memory operation initiated on this region + For IDisposable + - + - A process-wide unique ID for an allocated memory region + Online compiler options - + - Indicates if the pointer points to managed memory + Option value converted to (void *) - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - - The stream association is specified using flags which must be - one of . - If the flag is specified, the memory can be accessed - by any stream on any device. - If the flag is specified, the program makes a guarantee - that it won't access the memory on the device from any stream. - If the flag is specified, the program makes a guarantee - that it will only access the memory on the device from hStream. It is illegal - to attach singly to the NULL stream, because the NULL stream is a virtual global - stream and not a specific stream. An error will be returned in this case. - - When memory is associated with a single stream, the Unified Memory system will - allow CPU access to this memory region so long as all operations in hStream - have completed, regardless of whether other streams are active. In effect, - this constrains exclusive ownership of the managed memory region by - an active GPU to per-stream activity instead of whole-GPU activity. - - Accessing memory on the device from streams that are not associated with - it will produce undefined results. No error checking is performed by the - Unified Memory system to ensure that kernels launched into other streams - do not access this region. - - It is a program's responsibility to order calls to - via events, synchronization or other means to ensure legal access to memory - at all times. Data visibility and coherency will be changed appropriately - for all kernels which follow a stream-association change. - - If hStream is destroyed while data is associated with it, the association is - removed and the association reverts to the default visibility of the allocation - as specified at cuMemAllocManaged. For __managed__ variables, the default - association is always . Note that destroying a stream is an - asynchronous operation, and as a result, the change to default association won't - happen until all work in the stream has completed. - + Option - Stream in which to enqueue the attach operation - Length of memory (must be zero) - Must be one of - - + + + + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + For dispose - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + Dispose - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. - - Note that this function is asynchronous with respect to the host and all work - on other devices. + For IDisposable - managed memory variable - Advice to be applied for the specified memory range - Device to apply the advice for + - + - Enumerator class for CudaManagedMemory_char4 + Max number of registers that a thread may use. + Option type: unsigned int + Applies to: compiler only - + - + Max number of registers that a thread may use. + Option type: unsigned int + Applies to: compiler only - + - + - + IN: Specifies minimum number of threads per block to target compilation + for + OUT: Returns the number of threads the compiler actually targeted. + This restricts the resource utilization fo the compiler (e.g. max + registers) such that a block with the given number of threads should be + able to launch based on register limitations. Note, this option does not + currently take into account any other resource limitations, such as + shared memory utilization. + Option type: unsigned int + Applies to: compiler only - + - + IN: Specifies minimum number of threads per block to target compilation + for + OUT: Returns the number of threads the compiler actually targeted. + This restricts the resource utilization fo the compiler (e.g. max + registers) such that a block with the given number of threads should be + able to launch based on register limitations. Note, this option does not + currently take into account any other resource limitations, such as + shared memory utilization. + Option type: unsigned int + Applies to: compiler only + - + - + Returns the number of threads the compiler actually targeted. + This restricts the resource utilization fo the compiler (e.g. max + registers) such that a block with the given number of threads should be + able to launch based on register limitations. Note, this option does not + currently take into account any other resource limitations, such as + shared memory utilization. + The value is only valid after a succesful call to - + - + Returns a float value in the option of the wall clock time, in + milliseconds, spent creating the cubin + Option type: float + Applies to: compiler and linker - - + - A variable located in managed memory. - Type: short + Returns a float value in the option of the wall clock time, in + milliseconds, spent creating the cubin + Option type: float + Applies to: compiler and linker - + - Creates a new CudaManagedMemory and allocates the memory on host/device. + Returns a float value in the option of the wall clock time, in + milliseconds, spent creating the cubin + Option type: float + Applies to: compiler and linker + The value is only valid after a succesful call to - In elements - - + - Creates a new CudaManagedMemory from definition in cu-file. + Pointer to a buffer in which to print any log messsages from PTXAS + that are informational in nature (the buffer size is specified via + option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) + Option type: char* + Applies to: compiler and linker + You must free the internal buffer array manually after use by calling ! - The module where the variable is defined in. - The variable name as defined in the cu-file. - + - Creates a new CudaManagedMemory from definition in cu-file. + Pointer to a buffer in which to print any log messsages from PTXAS + that are informational in nature + Option type: char* + Applies to: compiler and linker + You must free the internal buffer array manually after use by calling ! - The kernel which module defines the variable. - The variable name as defined in the cu-file. + Size of the internal buffer array - + - For dispose + ManagedCuda allocates an byte array as buffer and pins it in order to pass it to Cuda. + You must free the buffer manually if the buffer is not needed anymore. - + - Dispose + Returns the buffer converted to string. + The value is only valid after a succesful call to - + - For IDisposable + - + - UIntPtr to managed memory. + Pointer to a buffer in which to print any log messages from PTXAS that + reflect errors + Option type: char* + Applies to: compiler and linker + You must free the internal buffer array manually after use by calling ! - + - CUdeviceptr to managed memory. + Pointer to a buffer in which to print any log messages from PTXAS that + reflect errors + Option type: char* + Applies to: compiler and linker + You must free the internal buffer array manually after use by calling ! + - + - Size in bytes + ManagedCuda allocates an byte array as buffer and pins it in order to pass it to Cuda. + You must free the buffer manually if the buffer is not needed anymore. - + - Size in elements + Returns the buffer converted to string. + The value is only valid after a succesful call to - + - Access array per element. + - index in elements - + - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Level of optimizations to apply to generated code (0 - 4), with 4 + being the default and highest level of optimizations. + Option type: unsigned int + Applies to: compiler only - + - Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + Level of optimizations to apply to generated code (0 - 4), with 4 + being the default and highest level of optimizations. + Option type: unsigned int + Applies to: compiler only - managed variable - newly allocated host variable with value from managed memory + Level of optimizations to apply to generated code (0 - 4), with 4 + being the default and highest level of optimizations. - + - The on which a pointer was allocated or registered + No option value required. Determines the target based on the current + attached context (default) + Option type: No option value needed + Applies to: compiler and linker - + - The describing the physical location of a pointer + Determines the target based on the current attached context (default) + Option type: No option value needed + Applies to: compiler and linker - + - The address at which a pointer's memory may be accessed on the device - Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + Target is chosen based on supplied . + Option type: unsigned int for enumerated type + Applies to: compiler and linker - + - The address at which a pointer's memory may be accessed on the host + Target is chosen based on supplied ::CUjit_target_enum. + Option type: unsigned int for enumerated type ::CUjit_target_enum + Applies to: compiler and linker + - + - A pair of tokens for use with the nv-p2p.h Linux kernel interface + Specifies choice of fallback strategy if matching cubin is not found. + Choice is based on supplied . + Option type: unsigned int for enumerated type + Applies to: compiler only - + - Synchronize every synchronous memory operation initiated on this region + Specifies choice of fallback strategy if matching cubin is not found. + Choice is based on supplied . + Option type: unsigned int for enumerated type + Applies to: compiler only + - + - A process-wide unique ID for an allocated memory region + Specifies whether to create debug information in output (-g) (0: false, default) + Option type: int + Applies to: compiler and linker - + - Indicates if the pointer points to managed memory + Specifies whether to create debug information in output (-g) (0: false, default) + Option type: int + Applies to: compiler and linker + - + - Attach memory to a stream asynchronously - - Enqueues an operation in hStream to specify stream association of - length bytes of memory starting from dptr. This function is a - stream-ordered operation, meaning that it is dependent on, and will - only take effect when, previous work in stream has completed. Any - previous association is automatically replaced. - - dptr must point to an address within managed memory space declared - using the __managed__ keyword or allocated with cuMemAllocManaged. - - length must be zero, to indicate that the entire allocation's - stream association is being changed. Currently, it's not possible - to change stream association for a portion of an allocation. - + Generate verbose log messages (0: false, default) + Option type: int + Applies to: compiler and linker + + + + + Generate verbose log messages (0: false, default) + Option type: int + Applies to: compiler and linker + + + + + + Generate line number information (-lineinfo) (0: false, default) + Option type: int + Applies to: compiler only + + + + + Generate line number information (-lineinfo) (0: false, default) + Option type: int + Applies to: compiler only + + + + + + Specifies whether to enable caching explicitly (-dlcm) + Choice is based on supplied . + Option type: unsigned int for enumerated type + Applies to: compiler only + + + + + Specifies whether to enable caching explicitly (-dlcm) + Choice is based on supplied . + Option type: unsigned int for enumerated type + Applies to: compiler only + + + + + + Enable link-time optimization (-dlto) for device code (0: false, default) + Option type: int + Applies to: compiler and linker + + + + + Enable link-time optimization (-dlto) for device code (0: false, default) + Option type: int + Applies to: compiler and linker + + + + + + Control single-precision denormals (-ftz) support (0: false, default). + 1 : flushes denormal values to zero + 0 : preserves denormal values + Option type: int + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + Control single-precision denormals (-ftz) support (0: false, default). + 1 : flushes denormal values to zero + 0 : preserves denormal values + Option type: int + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + + Control single-precision floating-point division and reciprocals + (-prec-div) support (1: true, default). + 1 : Enables the IEEE round-to-nearest mode + 0 : Enables the fast approximation mode + Option type: int + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + Control single-precision floating-point division and reciprocals + (-prec-div) support (1: true, default). + 1 : Enables the IEEE round-to-nearest mode + 0 : Enables the fast approximation mode + Option type: int + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + + Control single-precision floating-point square root + (-prec-sqrt) support (1: true, default). + 1 : Enables the IEEE round-to-nearest mode + 0 : Enables the fast approximation mode + Option type: int\n + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + Control single-precision floating-point square root + (-prec-sqrt) support (1: true, default). + 1 : Enables the IEEE round-to-nearest mode + 0 : Enables the fast approximation mode + Option type: int\n + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + + Enable/Disable the contraction of floating-point multiplies + and adds/subtracts into floating-point multiply-add (-fma) + operations (1: Enable, default; 0: Disable). + Option type: int\n + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + Enable/Disable the contraction of floating-point multiplies + and adds/subtracts into floating-point multiply-add (-fma) + operations (1: Enable, default; 0: Disable). + Option type: int\n + Applies to: link-time optimization specified with CU_JIT_LTO + + + + + + A pending JIT linker invocation. + + + + + Creates a pending JIT linker invocation. + + + + + Creates a pending JIT linker invocation. + + Collection of linker and compiler options + + + + For dispose + + + + + Dispose + Destroys state for a JIT linker invocation. + + + + + For IDisposable. + Destroys state for a JIT linker invocation. + + + + + + Add an input to a pending linker invocation. + + The input data. PTX must be NULL-terminated. + The type of the input data. + An optional name for this input in log messages. + Collection of linker and compiler options + + + + Add an input to a pending linker invocation. + + The input data. PTX must be NULL-terminated. + The type of the input data. + An optional name for this input in log messages. + Collection of linker and compiler options + + + + Add an input to a pending linker invocation. + + Path to the input file. + The type of the input data. + Collection of linker and compiler options + + + + Complete a pending linker invocation. + Completes the pending linker action and returns the cubin image for the linked + device code, which can be used with ::cuModuleLoadData. + + + + + A variable located in managed memory. + Type: byte + + + + + Creates a new CudaManagedMemory and allocates the memory on host/device. + + In elements + + + + + Creates a new CudaManagedMemory from definition in cu-file. + + The module where the variable is defined in. + The variable name as defined in the cu-file. + + + + Creates a new CudaManagedMemory from definition in cu-file. + + The kernel which module defines the variable. + The variable name as defined in the cu-file. + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + UIntPtr to managed memory. + + + + + CUdeviceptr to managed memory. + + + + + Size in bytes + + + + + Size in elements + + + + + Access array per element. + + index in elements + + + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + + + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + + managed variable + newly allocated host variable with value from managed memory + + + + The on which a pointer was allocated or registered + + + + + The describing the physical location of a pointer + + + + + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. + + + + + The address at which a pointer's memory may be accessed on the host + + + + + A pair of tokens for use with the nv-p2p.h Linux kernel interface + + + + + Synchronize every synchronous memory operation initiated on this region + + + + + A process-wide unique ID for an allocated memory region + + + + + Indicates if the pointer points to managed memory + + + + + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + The stream association is specified using flags which must be one of . If the flag is specified, the memory can be accessed @@ -13448,7 +12229,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -13472,7 +12253,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -13535,7 +12316,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -13597,162 +12378,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_short + Enumerator class for CudaManagedMemory_byte - + - + - + - + - + - + A variable located in managed memory. - Type: short1 + Type: uchar1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -13809,7 +12590,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -13833,7 +12614,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -13896,7 +12677,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -13958,162 +12739,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_short1 + Enumerator class for CudaManagedMemory_uchar1 - + - + - + - + - + - + A variable located in managed memory. - Type: short2 + Type: uchar2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -14170,7 +12951,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -14194,7 +12975,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -14257,7 +13038,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -14319,162 +13100,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_short2 + Enumerator class for CudaManagedMemory_uchar2 - + - + - + - + - + - + A variable located in managed memory. - Type: short3 + Type: uchar3 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -14531,7 +13312,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -14555,7 +13336,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -14618,7 +13399,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -14680,162 +13461,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_short3 + Enumerator class for CudaManagedMemory_uchar3 - + - + - + - + - + - + A variable located in managed memory. - Type: short4 + Type: uchar4 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -14892,7 +13673,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -14916,7 +13697,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -14979,7 +13760,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -15041,162 +13822,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_short4 + Enumerator class for CudaManagedMemory_uchar4 - + - + - + - + - + - + A variable located in managed memory. - Type: ushort + Type: sbyte - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -15253,7 +14034,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -15277,7 +14058,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -15340,7 +14121,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -15402,162 +14183,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ushort + Enumerator class for CudaManagedMemory_sbyte - + - + - + - + - + - + A variable located in managed memory. - Type: ushort1 + Type: char1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -15614,7 +14395,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -15638,7 +14419,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -15701,7 +14482,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -15763,162 +14544,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ushort1 + Enumerator class for CudaManagedMemory_char1 - + - + - + - + - + - + A variable located in managed memory. - Type: ushort2 + Type: char2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -15975,7 +14756,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -15999,7 +14780,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -16062,7 +14843,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -16124,162 +14905,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ushort2 + Enumerator class for CudaManagedMemory_char2 - + - + - + - + - + - + A variable located in managed memory. - Type: ushort3 + Type: char3 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -16336,7 +15117,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -16360,7 +15141,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -16423,7 +15204,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -16485,162 +15266,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ushort3 + Enumerator class for CudaManagedMemory_char3 - + - + - + - + - + - + A variable located in managed memory. - Type: ushort4 + Type: char4 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -16697,7 +15478,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -16721,7 +15502,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -16784,7 +15565,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -16846,162 +15627,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ushort4 + Enumerator class for CudaManagedMemory_char4 - + - + - + - + - + - + A variable located in managed memory. - Type: int + Type: short - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -17058,7 +15839,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -17082,7 +15863,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -17145,7 +15926,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -17207,162 +15988,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_int + Enumerator class for CudaManagedMemory_short - + - + - + - + - + - + A variable located in managed memory. - Type: int1 + Type: short1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -17419,7 +16200,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -17443,7 +16224,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -17506,7 +16287,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -17568,162 +16349,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_int1 + Enumerator class for CudaManagedMemory_short1 - + - + - + - + - + - + A variable located in managed memory. - Type: int2 + Type: short2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -17780,7 +16561,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -17804,7 +16585,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -17867,7 +16648,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -17929,162 +16710,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_int2 + Enumerator class for CudaManagedMemory_short2 - + - + - + - + - + - + A variable located in managed memory. - Type: int3 + Type: short3 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -18141,7 +16922,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -18165,7 +16946,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -18228,7 +17009,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -18290,162 +17071,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_int3 + Enumerator class for CudaManagedMemory_short3 - + - + - + - + - + - + A variable located in managed memory. - Type: int4 + Type: short4 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -18502,7 +17283,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -18526,7 +17307,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -18589,7 +17370,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -18651,162 +17432,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_int4 + Enumerator class for CudaManagedMemory_short4 - + - + - + - + - + - + A variable located in managed memory. - Type: uint + Type: ushort - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -18863,7 +17644,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -18887,7 +17668,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -18950,7 +17731,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -19012,162 +17793,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_uint + Enumerator class for CudaManagedMemory_ushort - + - + - + - + - + - + A variable located in managed memory. - Type: uint1 + Type: ushort1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -19224,7 +18005,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -19248,7 +18029,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -19311,7 +18092,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -19373,162 +18154,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_uint1 + Enumerator class for CudaManagedMemory_ushort1 - + - + - + - + - + - + A variable located in managed memory. - Type: uint2 + Type: ushort2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -19585,7 +18366,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -19609,7 +18390,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -19672,7 +18453,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -19734,162 +18515,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_uint2 + Enumerator class for CudaManagedMemory_ushort2 - + - + - + - + - + - + A variable located in managed memory. - Type: uint3 + Type: ushort3 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -19946,7 +18727,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -19970,7 +18751,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -20033,7 +18814,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -20095,162 +18876,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_uint3 + Enumerator class for CudaManagedMemory_ushort3 - + - + - + - + - + - + A variable located in managed memory. - Type: uint4 + Type: ushort4 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -20307,7 +19088,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -20331,7 +19112,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -20394,7 +19175,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -20456,162 +19237,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_uint4 + Enumerator class for CudaManagedMemory_ushort4 - + - + - + - + - + - + A variable located in managed memory. - Type: long + Type: int - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -20668,7 +19449,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -20692,7 +19473,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -20755,7 +19536,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -20817,162 +19598,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_long + Enumerator class for CudaManagedMemory_int - + - + - + - + - + - + A variable located in managed memory. - Type: long1 + Type: int1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -21029,7 +19810,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -21053,7 +19834,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -21116,7 +19897,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -21178,162 +19959,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_long1 + Enumerator class for CudaManagedMemory_int1 - + - + - + - + - + - + A variable located in managed memory. - Type: long2 + Type: int2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -21390,7 +20171,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -21414,7 +20195,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -21477,7 +20258,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -21539,162 +20320,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_long2 + Enumerator class for CudaManagedMemory_int2 - + - + - + - + - + - + A variable located in managed memory. - Type: ulong + Type: int3 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -21751,7 +20532,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -21775,7 +20556,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -21838,7 +20619,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -21900,162 +20681,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ulong + Enumerator class for CudaManagedMemory_int3 - + - + - + - + - + - + A variable located in managed memory. - Type: ulong1 + Type: int4 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -22112,7 +20893,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -22136,7 +20917,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -22199,7 +20980,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -22261,162 +21042,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ulong1 + Enumerator class for CudaManagedMemory_int4 - + - + - + - + - + - + A variable located in managed memory. - Type: ulong2 + Type: uint - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -22473,7 +21254,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -22497,7 +21278,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -22560,7 +21341,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -22622,162 +21403,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_ulong2 + Enumerator class for CudaManagedMemory_uint - + - + - + - + - + - + A variable located in managed memory. - Type: float + Type: uint1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -22834,7 +21615,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -22858,7 +21639,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -22921,7 +21702,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -22983,162 +21764,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_float + Enumerator class for CudaManagedMemory_uint1 - + - + - + - + - + - + A variable located in managed memory. - Type: float1 + Type: uint2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -23195,7 +21976,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -23219,7 +22000,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -23282,7 +22063,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -23344,162 +22125,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_float1 + Enumerator class for CudaManagedMemory_uint2 - + - + - + - + - + - + A variable located in managed memory. - Type: float2 + Type: uint3 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -23556,7 +22337,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -23580,7 +22361,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -23643,7 +22424,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -23705,162 +22486,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_float2 + Enumerator class for CudaManagedMemory_uint3 - + - + - + - + - + - + A variable located in managed memory. - Type: float3 + Type: uint4 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -23917,7 +22698,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -23941,7 +22722,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -24004,7 +22785,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -24066,162 +22847,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_float3 + Enumerator class for CudaManagedMemory_uint4 - + - + - + - + - + - + A variable located in managed memory. - Type: float4 + Type: long - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -24278,7 +23059,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -24302,7 +23083,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -24365,7 +23146,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -24427,162 +23208,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_float4 + Enumerator class for CudaManagedMemory_long - + - + - + - + - + - + A variable located in managed memory. - Type: double + Type: long1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -24639,7 +23420,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -24663,7 +23444,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -24726,7 +23507,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -24788,162 +23569,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_double + Enumerator class for CudaManagedMemory_long1 - + - + - + - + - + - + A variable located in managed memory. - Type: double1 + Type: long2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -25000,7 +23781,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -25024,7 +23805,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -25087,7 +23868,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -25149,162 +23930,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_double1 + Enumerator class for CudaManagedMemory_long2 - + - + - + - + - + - + A variable located in managed memory. - Type: double2 + Type: ulong - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -25361,7 +24142,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -25385,7 +24166,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -25448,7 +24229,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -25510,162 +24291,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_double2 + Enumerator class for CudaManagedMemory_ulong - + - + - + - + - + - + A variable located in managed memory. - Type: cuDoubleComplex + Type: ulong1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -25722,7 +24503,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -25746,7 +24527,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -25809,7 +24590,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -25871,162 +24652,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_cuDoubleComplex + Enumerator class for CudaManagedMemory_ulong1 - + - + - + - + - + - + A variable located in managed memory. - Type: cuDoubleReal + Type: ulong2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -26083,7 +24864,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -26107,7 +24888,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -26170,7 +24951,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -26232,162 +25013,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_cuDoubleReal + Enumerator class for CudaManagedMemory_ulong2 - + - + - + - + - + - + A variable located in managed memory. - Type: cuFloatComplex + Type: float - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -26444,7 +25225,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -26468,7 +25249,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -26531,7 +25312,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -26593,162 +25374,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_cuFloatComplex + Enumerator class for CudaManagedMemory_float - + - + - + - + - + - + A variable located in managed memory. - Type: cuFloatReal + Type: float1 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -26805,7 +25586,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -26829,7 +25610,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -26892,7 +25673,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -26954,162 +25735,162 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_cuFloatReal + Enumerator class for CudaManagedMemory_float1 - + - + - + - + - + - + A variable located in managed memory. - Type: dim3 + Type: float2 - + Creates a new CudaManagedMemory and allocates the memory on host/device. In elements - + Creates a new CudaManagedMemory from definition in cu-file. The module where the variable is defined in. The variable name as defined in the cu-file. - + Creates a new CudaManagedMemory from definition in cu-file. The kernel which module defines the variable. The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + UIntPtr to managed memory. - + CUdeviceptr to managed memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. managed variable newly allocated host variable with value from managed memory - + The on which a pointer was allocated or registered - + The describing the physical location of a pointer - + The address at which a pointer's memory may be accessed on the device Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + The address at which a pointer's memory may be accessed on the host - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + Synchronize every synchronous memory operation initiated on this region - + A process-wide unique ID for an allocated memory region - + Indicates if the pointer points to managed memory - + Attach memory to a stream asynchronously @@ -27166,7 +25947,7 @@ Must be one of - + Prefetches memory to the specified destination device Prefetches memory to the specified destination device. devPtr is the @@ -27190,7 +25971,7 @@ Stream to enqueue prefetch operation Note that this function is asynchronous with respect to the host and all work on other devices. - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -27253,7 +26034,7 @@ Advice to be applied for the specified memory range Device to apply the advice for - + Advise about the usage of a given memory range Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. @@ -27315,3423 +26096,4604 @@ Advice to be applied for the specified memory range Device to apply the advice for - + - Enumerator class for CudaManagedMemory_dim3 + Enumerator class for CudaManagedMemory_float2 - + - + - + - + - + - + - Number of channels in array + A variable located in managed memory. + Type: float3 - + - One channel, e.g. float1, int1, float, int + Creates a new CudaManagedMemory and allocates the memory on host/device. + In elements + - + - Two channels, e.g. float2, int2 + Creates a new CudaManagedMemory from definition in cu-file. + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Four channels, e.g. float4, int4 + Creates a new CudaManagedMemory from definition in cu-file. + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + - A mipmapped Cuda array + For dispose - + - Creates a CUDA mipmapped array according to descriptor. - Width, Height, and Depth are the width, height, and depth of the CUDA array (in elements); the following - types of CUDA arrays can be allocated: - – A 1D mipmapped array is allocated if Height and Depth extents are both zero. - – A 2D mipmapped array is allocated if only Depth extent is zero. - – A 3D mipmapped array is allocated if all three extents are non-zero. - – A 1D layered CUDA mipmapped array is allocated if only Height is zero and the - flag is set. Each layer is a 1D array. The number of layers is determined by the depth extent. - – A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and the - flag is set. Each layer is a 2D array. The number of layers is determined by the depth extent. - – A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the - flag is set. Width must be equal to Height, and Depth must be six. A - cubemap is a special type of 2D layered CUDA array, where the six layers represent the six faces of a - cube. The order of the six layers in memory is the same as that listed in CUarray_cubemap_face. - – A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, and both, - and flags are set. Width must be equal - to Height, and Depth must be a multiple of six. A cubemap layered CUDA array is a special type of - 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the first - cubemap, the next six layers form the second cubemap, and so on. - Flags may be set to: - – to enable creation of layered CUDA mipmapped arrays. If this flag is set, - Depth specifies the number of layers, not the depth of a 3D array. - – to enable creation of mipmapped cubemaps. If this flag is set, Width - must be equal to Height, and Depth must be six. If the CUDA_ARRAY3D_LAYERED flag is also set, - then Depth must be a multiple of six. - – to indicate that the CUDA mipmapped array will be used for - texture gather. Texture gather can only be performed on 2D CUDA mipmapped arrays. + Dispose - mipmapped array descriptor - Number of mipmap levels. This value is clamped to the range [1, 1 + floor(log2(max(width, height, depth)))] - + - Creates a CUDA mipmapped array according to descriptor. - Width, Height, and Depth are the width, height, and depth of the CUDA array (in elements); the following - types of CUDA arrays can be allocated: - – A 1D mipmapped array is allocated if Height and Depth extents are both zero. - – A 2D mipmapped array is allocated if only Depth extent is zero. - – A 3D mipmapped array is allocated if all three extents are non-zero. - – A 1D layered CUDA mipmapped array is allocated if only Height is zero and the - flag is set. Each layer is a 1D array. The number of layers is determined by the depth extent. - – A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and the - flag is set. Each layer is a 2D array. The number of layers is determined by the depth extent. - – A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the - flag is set. Width must be equal to Height, and Depth must be six. A - cubemap is a special type of 2D layered CUDA array, where the six layers represent the six faces of a - cube. The order of the six layers in memory is the same as that listed in CUarray_cubemap_face. - – A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, and both, - and flags are set. Width must be equal - to Height, and Depth must be a multiple of six. A cubemap layered CUDA array is a special type of - 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the first - cubemap, the next six layers form the second cubemap, and so on. + For IDisposable - Array format - Array width. See general description. - Array height. See general description. - Array depth or layer count. See general description. - number of channels - Flags may be set to: - – to enable creation of layered CUDA mipmapped arrays. If this flag is set, - Depth specifies the number of layers, not the depth of a 3D array. - – to enable creation of mipmapped cubemaps. If this flag is set, Width - must be equal to Height, and Depth must be six. If the CUDA_ARRAY3D_LAYERED flag is also set, - then Depth must be a multiple of six. - – to indicate that the CUDA mipmapped array will be used for - texture gather. Texture gather can only be performed on 2D CUDA mipmapped arrays. - Number of mipmap levels. This value is clamped to the range [1, 1 + floor(log2(max(width, height, depth)))] + - + - Creates a CUDA mipmapped array from an existing mipmap array handle. + UIntPtr to managed memory. - handle to wrap - Array format of the wrapped array. Cannot be gathered through CUDA API. - Number of channels of wrapped array. - + - Dispose + CUdeviceptr to managed memory. - + - For IDisposable + Size in bytes - - + - Returns a CUDA array that represents a single mipmap level - of the CUDA mipmapped array. + Size in elements - Mipmap level - + - Returns a CUDA array that represents a single mipmap level - of the CUDA mipmapped array. + Access array per element. - Mipmap level + index in elements + - + - Returns a CUDA array that represents a single mipmap level - of the CUDA mipmapped array. + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - Mipmap level - + - Returns a CUDA array that represents a single mipmap level - of the CUDA mipmapped array. + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - Mipmap level + managed variable + newly allocated host variable with value from managed memory - + - Returns the wrapped CUmipmappedArray + The on which a pointer was allocated or registered - + - Returns the wrapped CUDAArray3DDescriptor + The describing the physical location of a pointer - + - Returns the Depth of the array + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + - Returns the Height of the array + The address at which a pointer's memory may be accessed on the host - + - Returns the array width in elements + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + - Returns the array creation flags + Synchronize every synchronous memory operation initiated on this region - + - Returns the array format + A process-wide unique ID for an allocated memory region - + - Returns number of channels + Indicates if the pointer points to managed memory - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of + - + - Cuda occupancy from CudaOccupancy.h + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - mirror the type and spelling of cudaDeviceProp's members keep these alphabetized + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - define our own cudaOccFuncAttributes to stay consistent with the original header file + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - - - - - - - - - - - - - - - - - - - + - + Enumerator class for CudaManagedMemory_float3 - + - cudaOccFuncAttributes + - - - Only the static part shared memory (without dynamic allocations) - - - + - + - - + - Occupancy Error types + - - - - + - input parameter is invalid + - + - requested device is not supported in current implementation or device is invalid + + - + - Function cache configurations + A variable located in managed memory. + Type: float4 - + - no preference for shared memory or L1 (default) + Creates a new CudaManagedMemory and allocates the memory on host/device. + In elements + - + - prefer larger shared memory and smaller L1 cache + Creates a new CudaManagedMemory from definition in cu-file. + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - prefer larger L1 cache and smaller shared memory + Creates a new CudaManagedMemory from definition in cu-file. + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + - prefer equal sized L1 cache and shared memory + For dispose - + - Occupancy Limiting Factors + Dispose - + - occupancy limited due to warps available + For IDisposable + - + - occupancy limited due to registers available + UIntPtr to managed memory. - + - occupancy limited due to shared memory available + CUdeviceptr to managed memory. - + - occupancy limited due to blocks available + Size in bytes - + - Partitioned global caching support + Size in elements - + - Partitioned global caching is not supported + Access array per element. + index in elements + - + - Partitioned global caching is supported + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + - This is only needed for Pascal. This, and - all references / explanations for this, - should be removed from the header before - exporting to toolkit. + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + managed variable + newly allocated host variable with value from managed memory - + - Partitioned global caching option + The on which a pointer was allocated or registered - + - Disable partitioned global caching + The describing the physical location of a pointer - + - Prefer partitioned global caching + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + - Force partitioned global caching + The address at which a pointer's memory may be accessed on the host - + - Per function opt in maximum dynamic shared memory limit + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + - Default shmem limit + Synchronize every synchronous memory operation initiated on this region - + - Use the optin shmem limit + A process-wide unique ID for an allocated memory region - + - Shared memory carveout configurations + Indicates if the pointer points to managed memory - + - no preference for shared memory or L1 (default) + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of + - + - prefer maximum available shared memory, minimum L1 cache + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - prefer maximum available L1 cache, minimum shared memory + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - prefer half of maximum available shared memory, with the rest as L1 cache + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - + Enumerator class for CudaManagedMemory_float4 - + - Active Thread Blocks per Multiprocessor + + - - - - - - - - - - - - - - - - - - - - - - - - - + - define cudaOccDeviceState to include any device property needed to be passed - in future GPUs so that user interfaces don't change ; hence users are encouraged - to declare the struct zero in order to handle the assignments of any field - that might be added for later GPUs. + - - - - - - - - *! - - - Align up shared memory based on compute major configurations - - - Shared memory based on the new carveoutConfig API introduced with Volta - - - Shared memory based on config requested by User - - - Return the per block shared memory limit based on function config - - - Partitioned global caching mode support - - + - Determine the maximum number of CTAs that can be run simultaneously per SM. - This is equivalent to the calculation done in the CUDA Occupancy Calculator - spreadsheet + - - - - - - - - + - - - - - - - - + - - - - - - + - + - A function to convert from block size to dynamic shared memory size. - e.g.: - If no dynamic shared memory is used: x => 0 - If 4 bytes shared memory per thread is used: x = 4 * x + A variable located in managed memory. + Type: double - block size - size of dynamic shared memory - + - A CudaOccupancy exception is thrown if a CudaOccupancy API method call does not return 0 + Creates a new CudaManagedMemory and allocates the memory on host/device. + In elements + - + - + Creates a new CudaManagedMemory from definition in cu-file. + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - + Creates a new CudaManagedMemory from definition in cu-file. - - + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + - + For dispose - - + - + Dispose - - + - + For IDisposable - - + - + - + UIntPtr to managed memory. - - - - + - + CUdeviceptr to managed memory. - - + - + Size in bytes - - - + - Checks if value is zero. If value is zero, CudaOccupancyException is thrown. + Size in elements - - + - + Access array per element. + index in elements + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: byte + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + - Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - In elements - Width including alignment in bytes - In elements - + managed variable + newly allocated host variable with value from managed memory - + - Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. + The on which a pointer was allocated or registered - In elements - Width including alignment in bytes - In elements - + - Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc without flags. + The describing the physical location of a pointer - In elements - In elements - + - Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc. + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - In elements - In elements - - + - For dispose + The address at which a pointer's memory may be accessed on the host - + - Dispose + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + - For IDisposable + Synchronize every synchronous memory operation initiated on this region - - + - Pointer to pinned host memory. + A process-wide unique ID for an allocated memory region - + - Width in elements + Indicates if the pointer points to managed memory - + - Height in elements - - - - - Pitch in bytes - - - - - Size in bytes - - - - - Type size in bytes - - - - - Access array per element. + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + - X-index in elements - Y-index in elements + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of - - - Synchron copy host to 2D Array - - - - - - Synchron copy host to 2D Array - - - - - - Synchron copy 2D Array to host - - - - - - Synchron copy 2D Array to host - - - - - - Synchron copy host to device - - - - - - Synchron copy host to device - - - - - - Synchron copy device to host - - - - - - Synchron copy device to host - - - - - - Synchron Copy host to pitched device - - - - - - - Synchron Copy host to pitched device - - - - - - Synchron copy device to host - - - - - - - Synchron copy device to host - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy 2D Array to host - - - - - - - Asynchron copy 2D Array to host - - - - - - - Asynchron Copy host to device - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to device - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to pitched device - - - - - - - - Asynchron Copy host to pitched device - - - - - - - Asynchron copy device to host - - - - - - + - Asynchron copy device to host + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - - + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - Device Pointer + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - Passes back the flags that were specified when allocating the pinned host buffer + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Enumerator class for CudaPageLockedHostMemory2D_byte + Enumerator class for CudaManagedMemory_double - + - + - + - + - + - - - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar1 - - - + - Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc + A variable located in managed memory. + Type: double1 - In elements - Width including alignment in bytes - In elements - - + - Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory and allocates the memory on host/device. - In elements - Width including alignment in bytes - In elements + In elements + - + - Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements - + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - + - Pointer to pinned host memory. + UIntPtr to managed memory. - + - Width in elements + CUdeviceptr to managed memory. - + - Height in elements + Size in bytes - + - Pitch in bytes + Size in elements - + - Size in bytes + Access array per element. + index in elements + - + - Type size in bytes + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + - Access array per element. + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - X-index in elements - Y-index in elements - + managed variable + newly allocated host variable with value from managed memory - + - Synchron copy host to 2D Array + The on which a pointer was allocated or registered - - + - Synchron copy host to 2D Array + The describing the physical location of a pointer - - + - Synchron copy 2D Array to host + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - - + - Synchron copy 2D Array to host + The address at which a pointer's memory may be accessed on the host - - + - Synchron copy host to device + A pair of tokens for use with the nv-p2p.h Linux kernel interface - - + - Synchron copy host to device + Synchronize every synchronous memory operation initiated on this region - - + - Synchron copy device to host + A process-wide unique ID for an allocated memory region - - + - Synchron copy device to host - - - - - - Synchron Copy host to pitched device - - - - - - - Synchron Copy host to pitched device - - - - - - Synchron copy device to host - - - - - - - Synchron copy device to host - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy 2D Array to host - - - - - - - Asynchron copy 2D Array to host - - - - - - - Asynchron Copy host to device - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to device - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to pitched device - - - - - - - - Asynchron Copy host to pitched device + Indicates if the pointer points to managed memory - - - + - Asynchron copy device to host + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + - - - + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of + - + - Asynchron copy device to host + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - - + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - Device Pointer + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - Passes back the flags that were specified when allocating the pinned host buffer + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Enumerator class for CudaPageLockedHostMemory2D_uchar1 + Enumerator class for CudaManagedMemory_double1 - + - + - + - + - + - - - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar2 - - - + - Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc + A variable located in managed memory. + Type: double2 - In elements - Width including alignment in bytes - In elements - - + - Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory and allocates the memory on host/device. - In elements - Width including alignment in bytes - In elements + In elements + - + - Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements - + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - - - Pointer to pinned host memory. - - - - - Width in elements - - - + - Height in elements + UIntPtr to managed memory. - + - Pitch in bytes + CUdeviceptr to managed memory. - + Size in bytes - + - Type size in bytes + Size in elements - + Access array per element. - X-index in elements - Y-index in elements + index in elements - + - Synchron copy host to 2D Array + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - + - Synchron copy host to 2D Array + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - + managed variable + newly allocated host variable with value from managed memory - + - Synchron copy 2D Array to host + The on which a pointer was allocated or registered - - + - Synchron copy 2D Array to host + The describing the physical location of a pointer - - + - Synchron copy host to device + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - - + - Synchron copy host to device + The address at which a pointer's memory may be accessed on the host - - + - Synchron copy device to host + A pair of tokens for use with the nv-p2p.h Linux kernel interface - - + - Synchron copy device to host + Synchronize every synchronous memory operation initiated on this region - - + - Synchron Copy host to pitched device + A process-wide unique ID for an allocated memory region - - - + - Synchron Copy host to pitched device + Indicates if the pointer points to managed memory - - + - Synchron copy device to host + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + - - - - - - Synchron copy device to host - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy 2D Array to host - - - - - - - Asynchron copy 2D Array to host - - - - - - - Asynchron Copy host to device - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to device - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to pitched device - - - - - - - - Asynchron Copy host to pitched device - - - - - - - Asynchron copy device to host - - - - + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of + - + - Asynchron copy device to host + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - - + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - Device Pointer + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - Passes back the flags that were specified when allocating the pinned host buffer + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Enumerator class for CudaPageLockedHostMemory2D_uchar2 + Enumerator class for CudaManagedMemory_double2 - + - + - + - + - + - - - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar3 - - - + - Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc + A variable located in managed memory. + Type: cuDoubleComplex - In elements - Width including alignment in bytes - In elements - - + - Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory and allocates the memory on host/device. - In elements - Width including alignment in bytes - In elements + In elements + - + - Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements - + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - - - Pointer to pinned host memory. - - - - - Width in elements - - - + - Height in elements + UIntPtr to managed memory. - + - Pitch in bytes + CUdeviceptr to managed memory. - + Size in bytes - + - Type size in bytes + Size in elements - + Access array per element. - X-index in elements - Y-index in elements + index in elements - - - Synchron copy host to 2D Array - - - - - - Synchron copy host to 2D Array - - - - - - Synchron copy 2D Array to host - - - - - - Synchron copy 2D Array to host - - - - - - Synchron copy host to device - - - - - - Synchron copy host to device - - - - + - Synchron copy device to host + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - + - Synchron copy device to host + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - + managed variable + newly allocated host variable with value from managed memory - + - Synchron Copy host to pitched device + The on which a pointer was allocated or registered - - - + - Synchron Copy host to pitched device + The describing the physical location of a pointer - - + - Synchron copy device to host + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - - - + - Synchron copy device to host + The address at which a pointer's memory may be accessed on the host - - + - Asynchron copy host to 2D Array + A pair of tokens for use with the nv-p2p.h Linux kernel interface - - - + - Asynchron copy host to 2D Array + Synchronize every synchronous memory operation initiated on this region - - - + - Asynchron copy 2D Array to host + A process-wide unique ID for an allocated memory region - - - + - Asynchron copy 2D Array to host + Indicates if the pointer points to managed memory - - - + - Asynchron Copy host to device + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + - - + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of + - + - Asynchron copy device to host + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - - + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Asynchron Copy host to device - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to pitched device - - - - - - - - Asynchron Copy host to pitched device - - - - - - - Asynchron copy device to host - - - - - - - - Asynchron copy device to host - - - - - - - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - Device Pointer + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - Passes back the flags that were specified when allocating the pinned host buffer + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Enumerator class for CudaPageLockedHostMemory2D_uchar3 + Enumerator class for CudaManagedMemory_cuDoubleComplex - + - + - + - + - + - - - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar4 - - - + - Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc + A variable located in managed memory. + Type: cuDoubleReal - In elements - Width including alignment in bytes - In elements - - + - Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory and allocates the memory on host/device. - In elements - Width including alignment in bytes - In elements + In elements + - + - Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc without flags. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc. + Creates a new CudaManagedMemory from definition in cu-file. - In elements - In elements - + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + For dispose - + Dispose - + For IDisposable - - - Pointer to pinned host memory. - - - - - Width in elements - - - + - Height in elements + UIntPtr to managed memory. - + - Pitch in bytes + CUdeviceptr to managed memory. - + Size in bytes - + - Type size in bytes + Size in elements - + Access array per element. - X-index in elements - Y-index in elements + index in elements - - - Synchron copy host to 2D Array - - - - - - Synchron copy host to 2D Array - - - - - - Synchron copy 2D Array to host - - - - - - Synchron copy 2D Array to host - - - - - - Synchron copy host to device - - - - - - Synchron copy host to device - - - - - - Synchron copy device to host - - - - - - Synchron copy device to host - - - - - - Synchron Copy host to pitched device - - - - - - - Synchron Copy host to pitched device - - - - - - Synchron copy device to host - - - - - - - Synchron copy device to host - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy host to 2D Array - - - - - - - Asynchron copy 2D Array to host - - - - - + - Asynchron copy 2D Array to host + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - + - Asynchron Copy host to device + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - - + managed variable + newly allocated host variable with value from managed memory - + - Asynchron copy device to host + The on which a pointer was allocated or registered - - - + - Asynchron Copy host to device + The describing the physical location of a pointer - - - + - Asynchron copy device to host + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - - - + - Asynchron Copy host to pitched device + The address at which a pointer's memory may be accessed on the host - - - - + - Asynchron Copy host to pitched device + A pair of tokens for use with the nv-p2p.h Linux kernel interface - - - + - Asynchron copy device to host + Synchronize every synchronous memory operation initiated on this region - - - - + - Asynchron copy device to host + A process-wide unique ID for an allocated memory region - - - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Indicates if the pointer points to managed memory - Device Pointer - + - Passes back the flags that were specified when allocating the pinned host buffer + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of - - - Enumerator class for CudaPageLockedHostMemory2D_uchar4 - - - - - - - - - + + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. - - - - + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - - - + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - - - + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: sbyte + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc + Enumerator class for CudaManagedMemory_cuDoubleReal - In elements - Width including alignment in bytes - In elements - - + - Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. + - In elements - Width including alignment in bytes - In elements + - + - Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc without flags. + - In elements - In elements - + - Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc. + - In elements - In elements - - + - For dispose + - + - Dispose + + - + - For IDisposable + A variable located in managed memory. + Type: cuFloatComplex - - + - Pointer to pinned host memory. + Creates a new CudaManagedMemory and allocates the memory on host/device. + In elements + - + - Width in elements + Creates a new CudaManagedMemory from definition in cu-file. + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Height in elements + Creates a new CudaManagedMemory from definition in cu-file. + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + - Pitch in bytes + For dispose - + - Size in bytes + Dispose - + - Type size in bytes + For IDisposable + - + - Access array per element. + UIntPtr to managed memory. - X-index in elements - Y-index in elements - - + - Synchron copy host to 2D Array + CUdeviceptr to managed memory. - - + - Synchron copy host to 2D Array + Size in bytes - - + - Synchron copy 2D Array to host + Size in elements - - + - Synchron copy 2D Array to host + Access array per element. - + index in elements + - + - Synchron copy host to device + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - + - Synchron copy host to device + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - + managed variable + newly allocated host variable with value from managed memory - + - Synchron copy device to host + The on which a pointer was allocated or registered - - + - Synchron copy device to host + The describing the physical location of a pointer - - + - Synchron Copy host to pitched device + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - - - + - Synchron Copy host to pitched device + The address at which a pointer's memory may be accessed on the host - - + - Synchron copy device to host + A pair of tokens for use with the nv-p2p.h Linux kernel interface - - - + - Synchron copy device to host + Synchronize every synchronous memory operation initiated on this region - - + - Asynchron copy host to 2D Array + A process-wide unique ID for an allocated memory region - - - + - Asynchron copy host to 2D Array + Indicates if the pointer points to managed memory - - - + - Asynchron copy 2D Array to host + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + - - + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of + - + - Asynchron copy 2D Array to host + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - - + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Asynchron Copy host to device + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - - + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - Asynchron copy device to host + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - - + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Asynchron Copy host to device + Enumerator class for CudaManagedMemory_cuFloatComplex - - - + - Asynchron copy device to host + - - + - + - Asynchron Copy host to pitched device + - - - - + - Asynchron Copy host to pitched device + - - - + - Asynchron copy device to host + - - - - + - Asynchron copy device to host + - - + - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + A variable located in managed memory. + Type: cuFloatReal - Device Pointer - + - Passes back the flags that were specified when allocating the pinned host buffer + Creates a new CudaManagedMemory and allocates the memory on host/device. - + In elements + - + - Enumerator class for CudaPageLockedHostMemory2D_sbyte + Creates a new CudaManagedMemory from definition in cu-file. + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - + Creates a new CudaManagedMemory from definition in cu-file. - + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + - + For dispose - + - + Dispose - + - + For IDisposable + - + - + UIntPtr to managed memory. - - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char1 + CUdeviceptr to managed memory. - + - Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc + Size in bytes - In elements - Width including alignment in bytes - In elements - - + - Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Size in elements - In elements - Width including alignment in bytes - In elements - + - Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc without flags. + Access array per element. - In elements - In elements + index in elements + - + - Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc. + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - In elements - In elements - - + - For dispose + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. + managed variable + newly allocated host variable with value from managed memory - + - Dispose + The on which a pointer was allocated or registered - + - For IDisposable + The describing the physical location of a pointer - - + - Pointer to pinned host memory. + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - + - Width in elements + The address at which a pointer's memory may be accessed on the host - + - Height in elements + A pair of tokens for use with the nv-p2p.h Linux kernel interface - + - Pitch in bytes + Synchronize every synchronous memory operation initiated on this region - + - Size in bytes + A process-wide unique ID for an allocated memory region - + - Type size in bytes + Indicates if the pointer points to managed memory - + - Access array per element. + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + - X-index in elements - Y-index in elements + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of - + - Synchron copy host to 2D Array + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Synchron copy host to 2D Array + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - Synchron copy 2D Array to host + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Synchron copy 2D Array to host + Enumerator class for CudaManagedMemory_cuFloatReal - - + - Synchron copy host to device + - + - + - Synchron copy host to device + - - + - Synchron copy device to host + - - + - Synchron copy device to host + - - + - Synchron Copy host to pitched device + - - + - + - Synchron Copy host to pitched device + A variable located in managed memory. + Type: dim3 - - + - Synchron copy device to host + Creates a new CudaManagedMemory and allocates the memory on host/device. - - + In elements + - + - Synchron copy device to host + Creates a new CudaManagedMemory from definition in cu-file. - + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Asynchron copy host to 2D Array + Creates a new CudaManagedMemory from definition in cu-file. - - + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + - Asynchron copy host to 2D Array + For dispose - - - + - Asynchron copy 2D Array to host + Dispose - - - + - Asynchron copy 2D Array to host + For IDisposable - - + - + - Asynchron Copy host to device + UIntPtr to managed memory. - - - + - Asynchron copy device to host + CUdeviceptr to managed memory. - - - + - Asynchron Copy host to device + Size in bytes - - - + - Asynchron copy device to host + Size in elements - - - + - Asynchron Copy host to pitched device + Access array per element. - - - + index in elements + - + - Asynchron Copy host to pitched device + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - + - Asynchron copy device to host + Converts a managed variable to a host value. In case of multiple managed values (array), only the first value is converted. - - - + managed variable + newly allocated host variable with value from managed memory - + - Asynchron copy device to host + The on which a pointer was allocated or registered - - - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + The describing the physical location of a pointer - Device Pointer - + - Passes back the flags that were specified when allocating the pinned host buffer + The address at which a pointer's memory may be accessed on the device + Except in the exceptional disjoint addressing cases, the value returned will equal the input value. - - + - Enumerator class for CudaPageLockedHostMemory2D_char1 + The address at which a pointer's memory may be accessed on the host - + - + A pair of tokens for use with the nv-p2p.h Linux kernel interface - - + - + Synchronize every synchronous memory operation initiated on this region - + - + A process-wide unique ID for an allocated memory region - + - + Indicates if the pointer points to managed memory - + - + Attach memory to a stream asynchronously + + Enqueues an operation in hStream to specify stream association of + length bytes of memory starting from dptr. This function is a + stream-ordered operation, meaning that it is dependent on, and will + only take effect when, previous work in stream has completed. Any + previous association is automatically replaced. + + dptr must point to an address within managed memory space declared + using the __managed__ keyword or allocated with cuMemAllocManaged. + + length must be zero, to indicate that the entire allocation's + stream association is being changed. Currently, it's not possible + to change stream association for a portion of an allocation. + + The stream association is specified using flags which must be + one of . + If the flag is specified, the memory can be accessed + by any stream on any device. + If the flag is specified, the program makes a guarantee + that it won't access the memory on the device from any stream. + If the flag is specified, the program makes a guarantee + that it will only access the memory on the device from hStream. It is illegal + to attach singly to the NULL stream, because the NULL stream is a virtual global + stream and not a specific stream. An error will be returned in this case. + + When memory is associated with a single stream, the Unified Memory system will + allow CPU access to this memory region so long as all operations in hStream + have completed, regardless of whether other streams are active. In effect, + this constrains exclusive ownership of the managed memory region by + an active GPU to per-stream activity instead of whole-GPU activity. + + Accessing memory on the device from streams that are not associated with + it will produce undefined results. No error checking is performed by the + Unified Memory system to ensure that kernels launched into other streams + do not access this region. + + It is a program's responsibility to order calls to + via events, synchronization or other means to ensure legal access to memory + at all times. Data visibility and coherency will be changed appropriately + for all kernels which follow a stream-association change. + + If hStream is destroyed while data is associated with it, the association is + removed and the association reverts to the default visibility of the allocation + as specified at cuMemAllocManaged. For __managed__ variables, the default + association is always . Note that destroying a stream is an + asynchronous operation, and as a result, the change to default association won't + happen until all work in the stream has completed. + + Stream in which to enqueue the attach operation + Length of memory (must be zero) + Must be one of - - - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char2 - - - + - Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - In elements - Width including alignment in bytes - In elements - + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - In elements - Width including alignment in bytes - In elements + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for - + - Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc without flags. + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - In elements - In elements + managed memory variable + Advice to be applied for the specified memory range + Device to apply the advice for - + - Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc. + Enumerator class for CudaManagedMemory_dim3 - In elements - In elements - - + - For dispose + + - + - Dispose + - + - For IDisposable + - - + - Pointer to pinned host memory. + - + - Width in elements + + - + - Height in elements + CudaMemoryPool - + - Pitch in bytes + Creates a new CudaMemoryPool. + - + - Size in bytes + imports a memory pool from a shared handle. + Specific allocations can be imported from the imported pool with cuMemPoolImportPointer. + note Imported memory pools do not support creating new allocations. As such imported memory pools + may not be used in cuDeviceSetMemPool or ::cuMemAllocFromPoolAsync calls. + OS handle of the pool to open + The type of handle being imported + must be 0 - + - Type size in bytes + Gets the current or default memory pool of the CUdevice. + The device to the memory pool from + Get the default or the current memory pool - + - Access array per element. + For dispose - X-index in elements - Y-index in elements - - + - Synchron copy host to 2D Array + Dispose - - + - Synchron copy host to 2D Array + For IDisposable - + - + - Synchron copy 2D Array to host + Tries to release memory back to the OS + Releases memory back to the OS until the pool contains fewer than minBytesToKeep + reserved bytes, or there is no more memory that the allocator can safely release. + The allocator cannot release OS allocations that back outstanding asynchronous allocations. + The OS allocations may happen at different granularity from the user allocations. + + note: Allocations that have not been freed count as outstanding. + note: Allocations that have been asynchronously freed but whose completion has + not been observed on the host (eg.by a synchronize) can count as outstanding. - + If the pool has less than minBytesToKeep reserved, + the TrimTo operation is a no-op.Otherwise the pool will be guaranteed to have at least minBytesToKeep bytes reserved after the operation. - + - Synchron copy 2D Array to host + Import a memory pool allocation from another process. + Returns in \p ptr_out a pointer to the imported memory. + The imported memory must not be accessed before the allocation operation completes + in the exporting process.The imported memory must be freed from all importing processes before + being freed in the exporting process.The pointer may be freed with cuMemFree + or cuMemFreeAsync.If cuMemFreeAsync is used, the free must be completed + on the importing process before the free operation on the exporting process. + note The cuMemFreeAsync api may be used in the exporting process before + the cuMemFreeAsync operation completes in its stream as long as the + cuMemFreeAsync in the exporting process specifies a stream with + a stream dependency on the importing process's cuMemFreeAsync. - - - + + + + + + + Allocates memory from a specified pool with stream ordered semantics. + Inserts an allocation operation into \p hStream. + A pointer to the allocated memory is returned immediately in *dptr. + The allocation must not be accessed until the the allocation operation completes. + The allocation comes from the specified memory pool. + note + - The specified memory pool may be from a device different than that of the specified \p hStream. + - Basic stream ordering allows future work submitted into the same stream to use the allocation. + Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation + operation completes before work submitted in a separate stream runs. + + Number of bytes to allocate + The stream establishing the stream ordering semantic + + + + Returns the accessibility of a pool from a device + Returns the accessibility of the pool's memory from the specified location. + + the location accessing the pool + + + + Controls visibility of pools between devices + + + + + Exports a memory pool to the requested handle type. + Given an IPC capable mempool, create an OS handle to share the pool with another process. + A recipient process can convert the shareable handle into a mempool with::cuMemPoolImportFromShareableHandle. + Individual pointers can then be shared with the ::cuMemPoolExportPointer and ::cuMemPoolImportPointer APIs. + The implementation of what the shareable handle is and how it can be transferred is defined by the requested + handle type. + note: To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than CU_MEM_HANDLE_TYPE_NONE. + + the type of handle to create + must be 0 + + + + Sets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next + call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by::cuMemFreeAsync(default enabled). + + The attribute to modify + Pointer to the value to assign + + + + Sets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next + call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by::cuMemFreeAsync(default enabled). + + The attribute to modify + Pointer to the value to assign + + + + Sets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next + call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by::cuMemFreeAsync(default enabled). + + The attribute to modify + Pointer to the value to assign + + + + Sets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next + call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by::cuMemFreeAsync(default enabled). + + The attribute to modify + Pointer to the value to assign + + - Synchron copy host to device + Returns the wrapped CUarray - - + - Synchron copy host to device + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - + - Synchron copy device to host + Number of channels in array - - + - Synchron copy device to host + One channel, e.g. float1, int1, float, int - - + - Synchron Copy host to pitched device + Two channels, e.g. float2, int2 - - - + - Synchron Copy host to pitched device + Four channels, e.g. float4, int4 - - + - Synchron copy device to host + A mipmapped Cuda array - - - + - Synchron copy device to host + Creates a CUDA mipmapped array according to descriptor. + Width, Height, and Depth are the width, height, and depth of the CUDA array (in elements); the following + types of CUDA arrays can be allocated: + – A 1D mipmapped array is allocated if Height and Depth extents are both zero. + – A 2D mipmapped array is allocated if only Depth extent is zero. + – A 3D mipmapped array is allocated if all three extents are non-zero. + – A 1D layered CUDA mipmapped array is allocated if only Height is zero and the + flag is set. Each layer is a 1D array. The number of layers is determined by the depth extent. + – A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and the + flag is set. Each layer is a 2D array. The number of layers is determined by the depth extent. + – A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the + flag is set. Width must be equal to Height, and Depth must be six. A + cubemap is a special type of 2D layered CUDA array, where the six layers represent the six faces of a + cube. The order of the six layers in memory is the same as that listed in CUarray_cubemap_face. + – A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, and both, + and flags are set. Width must be equal + to Height, and Depth must be a multiple of six. A cubemap layered CUDA array is a special type of + 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the first + cubemap, the next six layers form the second cubemap, and so on. + Flags may be set to: + – to enable creation of layered CUDA mipmapped arrays. If this flag is set, + Depth specifies the number of layers, not the depth of a 3D array. + – to enable creation of mipmapped cubemaps. If this flag is set, Width + must be equal to Height, and Depth must be six. If the CUDA_ARRAY3D_LAYERED flag is also set, + then Depth must be a multiple of six. + – to indicate that the CUDA mipmapped array will be used for + texture gather. Texture gather can only be performed on 2D CUDA mipmapped arrays. - + mipmapped array descriptor + Number of mipmap levels. This value is clamped to the range [1, 1 + floor(log2(max(width, height, depth)))] - + - Asynchron copy host to 2D Array + Creates a CUDA mipmapped array according to descriptor. + Width, Height, and Depth are the width, height, and depth of the CUDA array (in elements); the following + types of CUDA arrays can be allocated: + – A 1D mipmapped array is allocated if Height and Depth extents are both zero. + – A 2D mipmapped array is allocated if only Depth extent is zero. + – A 3D mipmapped array is allocated if all three extents are non-zero. + – A 1D layered CUDA mipmapped array is allocated if only Height is zero and the + flag is set. Each layer is a 1D array. The number of layers is determined by the depth extent. + – A 2D layered CUDA mipmapped array is allocated if all three extents are non-zero and the + flag is set. Each layer is a 2D array. The number of layers is determined by the depth extent. + – A cubemap CUDA mipmapped array is allocated if all three extents are non-zero and the + flag is set. Width must be equal to Height, and Depth must be six. A + cubemap is a special type of 2D layered CUDA array, where the six layers represent the six faces of a + cube. The order of the six layers in memory is the same as that listed in CUarray_cubemap_face. + – A cubemap layered CUDA mipmapped array is allocated if all three extents are non-zero, and both, + and flags are set. Width must be equal + to Height, and Depth must be a multiple of six. A cubemap layered CUDA array is a special type of + 2D layered CUDA array that consists of a collection of cubemaps. The first six layers represent the first + cubemap, the next six layers form the second cubemap, and so on. - - + Array format + Array width. See general description. + Array height. See general description. + Array depth or layer count. See general description. + number of channels + Flags may be set to: + – to enable creation of layered CUDA mipmapped arrays. If this flag is set, + Depth specifies the number of layers, not the depth of a 3D array. + – to enable creation of mipmapped cubemaps. If this flag is set, Width + must be equal to Height, and Depth must be six. If the CUDA_ARRAY3D_LAYERED flag is also set, + then Depth must be a multiple of six. + – to indicate that the CUDA mipmapped array will be used for + texture gather. Texture gather can only be performed on 2D CUDA mipmapped arrays. + Number of mipmap levels. This value is clamped to the range [1, 1 + floor(log2(max(width, height, depth)))] - + - Asynchron copy host to 2D Array + Creates a CUDA mipmapped array from an existing mipmap array handle. - - + handle to wrap + Array format of the wrapped array. Cannot be gathered through CUDA API. + Number of channels of wrapped array. - + - Asynchron copy 2D Array to host + Dispose - - - + - Asynchron copy 2D Array to host + For IDisposable - - + - + - Asynchron Copy host to device + Returns a CUDA array that represents a single mipmap level + of the CUDA mipmapped array. - - + Mipmap level - + - Asynchron copy device to host + Returns a CUDA array that represents a single mipmap level + of the CUDA mipmapped array. - - + Mipmap level - + - Asynchron Copy host to device + Returns a CUDA array that represents a single mipmap level + of the CUDA mipmapped array. - - + Mipmap level - + - Asynchron copy device to host + Returns a CUDA array that represents a single mipmap level + of the CUDA mipmapped array. - - + Mipmap level - + - Asynchron Copy host to pitched device + Returns the layout properties of a sparse CUDA mipmapped array + Returns the sparse array layout properties in \p sparseProperties + If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_SPARSE + ::CUDA_ERROR_INVALID_VALUE will be returned. + For non-layered CUDA mipmapped arrays, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize returns the + size of the mip tail region.The mip tail region includes all mip levels whose width, height or depth + is less than that of the tile. + For layered CUDA mipmapped arrays, if ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, + then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies the size of the mip tail of all layers combined. + Otherwise, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies mip tail size per layer. + The returned value of::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is valid only if ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize is non-zero. - - - - + - Asynchron Copy host to pitched device + Returns the wrapped CUmipmappedArray - - - + - Asynchron copy device to host + Returns the wrapped CUDAArray3DDescriptor - - - - + - Asynchron copy device to host + Returns the Depth of the array - - - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Returns the Height of the array - Device Pointer - + - Passes back the flags that were specified when allocating the pinned host buffer + Returns the array width in elements - - + - Enumerator class for CudaPageLockedHostMemory2D_char2 + Returns the array creation flags - + - + Returns the array format - - + - + Returns number of channels - + - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + - + Cuda occupancy from CudaOccupancy.h - + - + mirror the type and spelling of cudaDeviceProp's members keep these alphabetized - - - - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char3 - + + - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc + define our own cudaOccFuncAttributes to stay consistent with the original header file - In elements - Width including alignment in bytes - In elements - - + + + + + + + + + + + + + + + + + + + - Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. + - In elements - Width including alignment in bytes - In elements - + - Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc without flags. + cudaOccFuncAttributes - In elements - In elements + + + Only the static part shared memory (without dynamic allocations) + + + - + - Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc. + - In elements - In elements - + - + - For dispose + Occupancy Error types - + + + + - Dispose + input parameter is invalid - + - For IDisposable + requested device is not supported in current implementation or device is invalid - - + - Pointer to pinned host memory. + Function cache configurations - + - Width in elements + no preference for shared memory or L1 (default) - + - Height in elements + prefer larger shared memory and smaller L1 cache - + - Pitch in bytes + prefer larger L1 cache and smaller shared memory - + - Size in bytes + prefer equal sized L1 cache and shared memory - + - Type size in bytes + Occupancy Limiting Factors - + - Access array per element. + occupancy limited due to warps available - X-index in elements - Y-index in elements - - + - Synchron copy host to 2D Array + occupancy limited due to registers available - - + - Synchron copy host to 2D Array + occupancy limited due to shared memory available - - + - Synchron copy 2D Array to host + occupancy limited due to blocks available - - + - Synchron copy 2D Array to host + Partitioned global caching support - - + - Synchron copy host to device + Partitioned global caching is not supported - - + - Synchron copy host to device + Partitioned global caching is supported - - + - Synchron copy device to host + Partitioned global caching option - - + - Synchron copy device to host + Disable partitioned global caching - - + - Synchron Copy host to pitched device + Prefer partitioned global caching - - - + - Synchron Copy host to pitched device + Force partitioned global caching - - + - Synchron copy device to host + Per function opt in maximum dynamic shared memory limit - - - + - Synchron copy device to host + Default shmem limit - - + - Asynchron copy host to 2D Array + Use the optin shmem limit - - - + - Asynchron copy host to 2D Array + Shared memory carveout configurations - - - + - Asynchron copy 2D Array to host + no preference for shared memory or L1 (default) - - - + - Asynchron copy 2D Array to host + prefer maximum available shared memory, minimum L1 cache - - - + - Asynchron Copy host to device + prefer maximum available L1 cache, minimum shared memory - - - + - Asynchron copy device to host + prefer half of maximum available shared memory, with the rest as L1 cache - - - + - Asynchron Copy host to device + - - - + - Asynchron copy device to host + Active Thread Blocks per Multiprocessor - - - + + + + + + + + + + + + + + + + + + + + + + + + + - Asynchron Copy host to pitched device + define cudaOccDeviceState to include any device property needed to be passed + in future GPUs so that user interfaces don't change ; hence users are encouraged + to declare the struct zero in order to handle the assignments of any field + that might be added for later GPUs. - - - - + + + + + + + + *! + + + Align up shared memory based on compute major configurations + + + Shared memory based on the new carveoutConfig API introduced with Volta + + + Shared memory based on config requested by User + + + Return the per block shared memory limit based on function config + + + Partitioned global caching mode support + + - Asynchron Copy host to pitched device + Determine the maximum number of CTAs that can be run simultaneously per SM. + This is equivalent to the calculation done in the CUDA Occupancy Calculator + spreadsheet - - + + + + + + + - + - Asynchron copy device to host + The CUDA dynamic shared memory calculator computes the maximum size of + per-block dynamic shared memory if we want to place numBlocks blocks + on an SM. + Returns maximum size of dynamic shared memory to allow numBlocks blocks per SM. - - - + + + + + + - + - Asynchron copy device to host + - - + + + + + + + - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + - Device Pointer + + + + + + - + - Passes back the flags that were specified when allocating the pinned host buffer + A function to convert from block size to dynamic shared memory size. + e.g.: + If no dynamic shared memory is used: x => 0 + If 4 bytes shared memory per thread is used: x = 4 * x - + block size + size of dynamic shared memory - + - Enumerator class for CudaPageLockedHostMemory2D_char3 + A CudaOccupancy exception is thrown if a CudaOccupancy API method call does not return 0 - + - - + + + - + + + + + + + + - + + + - + + + + + + + + + - + + + + + + + + + + Checks if value is zero. If value is zero, CudaOccupancyException is thrown. + + + + + + + + + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char4 + Type: byte - + - Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -30739,137 +30701,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -30877,14 +30839,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -30892,144 +30854,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_char4 + Enumerator class for CudaPageLockedHostMemory2D_byte - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short + Type: uchar1 - + - Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -31037,137 +30999,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -31175,14 +31137,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -31190,144 +31152,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_short + Enumerator class for CudaPageLockedHostMemory2D_uchar1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short1 + Type: uchar2 - + - Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -31335,137 +31297,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -31473,14 +31435,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -31488,144 +31450,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_short1 + Enumerator class for CudaPageLockedHostMemory2D_uchar2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short2 + Type: uchar3 - + - Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -31633,137 +31595,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -31771,14 +31733,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -31786,144 +31748,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_short2 + Enumerator class for CudaPageLockedHostMemory2D_uchar3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short3 + Type: uchar4 - + - Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -31931,137 +31893,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -32069,14 +32031,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -32084,144 +32046,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_short3 + Enumerator class for CudaPageLockedHostMemory2D_uchar4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short4 + Type: sbyte - + - Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -32229,137 +32191,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -32367,14 +32329,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -32382,144 +32344,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_short4 + Enumerator class for CudaPageLockedHostMemory2D_sbyte - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort + Type: char1 - + - Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -32527,137 +32489,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -32665,14 +32627,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -32680,144 +32642,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ushort + Enumerator class for CudaPageLockedHostMemory2D_char1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort1 + Type: char2 - + - Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -32825,137 +32787,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -32963,14 +32925,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -32978,144 +32940,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ushort1 + Enumerator class for CudaPageLockedHostMemory2D_char2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort2 + Type: char3 - + - Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -33123,137 +33085,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -33261,14 +33223,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -33276,144 +33238,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ushort2 + Enumerator class for CudaPageLockedHostMemory2D_char3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort3 + Type: char4 - + - Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -33421,137 +33383,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -33559,14 +33521,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -33574,144 +33536,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ushort3 + Enumerator class for CudaPageLockedHostMemory2D_char4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort4 + Type: short - + - Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_short and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -33719,137 +33681,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -33857,14 +33819,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -33872,144 +33834,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ushort4 + Enumerator class for CudaPageLockedHostMemory2D_short - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int + Type: short1 - + - Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -34017,137 +33979,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -34155,14 +34117,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -34170,144 +34132,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_int + Enumerator class for CudaPageLockedHostMemory2D_short1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int1 + Type: short2 - + - Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -34315,137 +34277,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -34453,14 +34415,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -34468,144 +34430,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_int1 + Enumerator class for CudaPageLockedHostMemory2D_short2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int2 + Type: short3 - + - Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -34613,137 +34575,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -34751,14 +34713,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -34766,144 +34728,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_int2 + Enumerator class for CudaPageLockedHostMemory2D_short3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int3 + Type: short4 - + - Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -34911,137 +34873,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -35049,14 +35011,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -35064,144 +35026,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_int3 + Enumerator class for CudaPageLockedHostMemory2D_short4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int4 + Type: ushort - + - Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -35209,137 +35171,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -35347,14 +35309,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -35362,144 +35324,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_int4 + Enumerator class for CudaPageLockedHostMemory2D_ushort - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint + Type: ushort1 - + - Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -35507,137 +35469,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -35645,14 +35607,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -35660,144 +35622,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_uint + Enumerator class for CudaPageLockedHostMemory2D_ushort1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint1 + Type: ushort2 - + - Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -35805,137 +35767,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -35943,14 +35905,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -35958,144 +35920,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_uint1 + Enumerator class for CudaPageLockedHostMemory2D_ushort2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint2 + Type: ushort3 - + - Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -36103,137 +36065,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -36241,14 +36203,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -36256,144 +36218,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_uint2 + Enumerator class for CudaPageLockedHostMemory2D_ushort3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint3 + Type: ushort4 - + - Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -36401,137 +36363,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -36539,14 +36501,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -36554,144 +36516,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_uint3 + Enumerator class for CudaPageLockedHostMemory2D_ushort4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint4 + Type: int - + - Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_int and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -36699,137 +36661,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -36837,14 +36799,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -36852,144 +36814,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_uint4 + Enumerator class for CudaPageLockedHostMemory2D_int - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: long + Type: int1 - + - Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -36997,137 +36959,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -37135,14 +37097,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -37150,144 +37112,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_long + Enumerator class for CudaPageLockedHostMemory2D_int1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: long1 + Type: int2 - + - Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -37295,137 +37257,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -37433,14 +37395,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -37448,144 +37410,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_long1 + Enumerator class for CudaPageLockedHostMemory2D_int2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: long2 + Type: int3 - + - Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -37593,137 +37555,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -37731,14 +37693,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -37746,144 +37708,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_long2 + Enumerator class for CudaPageLockedHostMemory2D_int3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ulong + Type: int4 - + - Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -37891,137 +37853,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -38029,14 +37991,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -38044,144 +38006,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ulong + Enumerator class for CudaPageLockedHostMemory2D_int4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ulong1 + Type: uint - + - Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -38189,137 +38151,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -38327,14 +38289,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -38342,144 +38304,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ulong1 + Enumerator class for CudaPageLockedHostMemory2D_uint - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ulong2 + Type: uint1 - + - Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -38487,137 +38449,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -38625,14 +38587,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -38640,144 +38602,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_ulong2 + Enumerator class for CudaPageLockedHostMemory2D_uint1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float + Type: uint2 - + - Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -38785,137 +38747,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -38923,14 +38885,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -38938,144 +38900,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_float + Enumerator class for CudaPageLockedHostMemory2D_uint2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float1 + Type: uint3 - + - Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -39083,137 +39045,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -39221,14 +39183,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -39236,144 +39198,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_float1 + Enumerator class for CudaPageLockedHostMemory2D_uint3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float2 + Type: uint4 - + - Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -39381,137 +39343,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -39519,14 +39481,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -39534,144 +39496,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_float2 + Enumerator class for CudaPageLockedHostMemory2D_uint4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float3 + Type: long - + - Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_long and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -39679,137 +39641,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -39817,14 +39779,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -39832,144 +39794,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_float3 + Enumerator class for CudaPageLockedHostMemory2D_long - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float4 + Type: long1 - + - Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -39977,137 +39939,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -40115,14 +40077,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -40130,144 +40092,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_float4 + Enumerator class for CudaPageLockedHostMemory2D_long1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: double + Type: long2 - + - Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -40275,137 +40237,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -40413,14 +40375,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -40428,144 +40390,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_double + Enumerator class for CudaPageLockedHostMemory2D_long2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: double1 + Type: ulong - + - Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -40573,137 +40535,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -40711,14 +40673,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -40726,144 +40688,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_double1 + Enumerator class for CudaPageLockedHostMemory2D_ulong - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: double2 + Type: ulong1 - + - Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -40871,137 +40833,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -41009,14 +40971,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -41024,144 +40986,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_double2 + Enumerator class for CudaPageLockedHostMemory2D_ulong1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuDoubleComplex + Type: ulong2 - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -41169,137 +41131,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -41307,14 +41269,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -41322,144 +41284,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_cuDoubleComplex + Enumerator class for CudaPageLockedHostMemory2D_ulong2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuDoubleReal + Type: float - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_float and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -41467,137 +41429,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -41605,14 +41567,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -41620,144 +41582,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_cuDoubleReal + Enumerator class for CudaPageLockedHostMemory2D_float - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuFloatComplex + Type: float1 - + - Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -41765,137 +41727,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -41903,14 +41865,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -41918,144 +41880,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_cuFloatComplex + Enumerator class for CudaPageLockedHostMemory2D_float1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuFloatReal + Type: float2 - + - Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -42063,137 +42025,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -42201,14 +42163,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -42216,144 +42178,144 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_cuFloatReal + Enumerator class for CudaPageLockedHostMemory2D_float2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: dim3 + Type: float3 - + - Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - + - Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc without flags. In elements In elements - + - Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc. In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -42361,137 +42323,137 @@ Y-index in elements - + Synchron copy host to 2D Array - + Synchron copy host to 2D Array - + Synchron copy 2D Array to host - + Synchron copy 2D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron Copy host to pitched device - + Synchron Copy host to pitched device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 2D Array - + Asynchron copy host to 2D Array - + Asynchron copy 2D Array to host - + Asynchron copy 2D Array to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron Copy host to pitched device @@ -42499,14 +42461,14 @@ - + Asynchron Copy host to pitched device - + Asynchron copy device to host @@ -42514,2324 +42476,2747 @@ - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory2D_dim3 + Enumerator class for CudaPageLockedHostMemory2D_float3 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: float4 - + - Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device + + + + + + + Synchron Copy host to pitched device + + + + + + Synchron copy device to host + + + + + + + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + - + - Synchron copy host to 3D Array + Asynchron copy host to 2D Array + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + Asynchron Copy host to device - + - Asynchron Copy host to device + Asynchron copy device to host - + - + - Asynchron copy device to host + Asynchron Copy host to device - + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_byte + Enumerator class for CudaPageLockedHostMemory2D_float4 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: double - + - Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_double and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device - + + - + - Synchron copy host to 3D Array + Synchron Copy host to pitched device - + - + - Synchron copy 3D Array to host + Synchron copy device to host - + + - + - Synchron copy 3D Array to host + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + + + + + + + Asynchron copy host to 2D Array + - + - Asynchron Copy host to device + Asynchron copy 2D Array to host - + - + + + Asynchron copy 2D Array to host + + + + + Asynchron Copy host to device - + - + Asynchron copy device to host - + + + Asynchron Copy host to device + + + + + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uchar1 + Enumerator class for CudaPageLockedHostMemory2D_double - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: double1 - + - Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device + + + + + + + Synchron Copy host to pitched device + + + + + + Synchron copy device to host + + + + + + + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + - + - Synchron copy host to 3D Array + Asynchron copy host to 2D Array + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + Asynchron Copy host to device - + - Asynchron Copy host to device + Asynchron copy device to host - + - + - Asynchron copy device to host + Asynchron Copy host to device - + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uchar2 + Enumerator class for CudaPageLockedHostMemory2D_double1 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: double2 - + - Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device + + + + + + + Synchron Copy host to pitched device + + + + + + Synchron copy device to host + + + + + + + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + - + - Synchron copy host to 3D Array + Asynchron copy host to 2D Array + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + Asynchron Copy host to device - + - Asynchron Copy host to device + Asynchron copy device to host - + - + - Asynchron copy device to host + Asynchron Copy host to device - + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uchar3 + Enumerator class for CudaPageLockedHostMemory2D_double2 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: cuDoubleComplex - + - Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device + + + + + + + Synchron Copy host to pitched device + + + + + + Synchron copy device to host + + + + + + + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + - + - Synchron copy host to 3D Array + Asynchron copy host to 2D Array + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + Asynchron Copy host to device - + - Asynchron Copy host to device + Asynchron copy device to host - + - + - Asynchron copy device to host + Asynchron Copy host to device - + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uchar4 + Enumerator class for CudaPageLockedHostMemory2D_cuDoubleComplex - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: cuDoubleReal - + - Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device + + + + + + + Synchron Copy host to pitched device + + + + + + Synchron copy device to host + + + + + + + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + - + - Synchron copy host to 3D Array + Asynchron copy host to 2D Array + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + Asynchron Copy host to device - + - Asynchron Copy host to device + Asynchron copy device to host - + - + - Asynchron copy device to host + Asynchron Copy host to device - + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_sbyte + Enumerator class for CudaPageLockedHostMemory2D_cuDoubleReal - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: cuFloatComplex - + - Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + - Synchron copy host to device + Synchron copy host to 2D Array - + - + - Synchron copy host to device + Synchron copy host to 2D Array - + - + - Synchron copy device to host + Synchron copy 2D Array to host - + - + - Synchron copy device to host + Synchron copy 2D Array to host - + - + - Synchron copy host to 3D array + Synchron copy host to device - + - + - Synchron copy host to 3D Array + Synchron copy host to device - + - + - Synchron copy 3D Array to host + Synchron copy device to host - + - + - Synchron copy 3D Array to host + Synchron copy device to host - + - + - Asynchron Copy host to device + Synchron Copy host to pitched device - + - + - Asynchron Copy host to device + Synchron Copy host to pitched device - - + - Asynchron copy device to host + Synchron copy device to host - + - + - Asynchron copy device to host + Synchron copy device to host - - + - Asynchron copy host to 3D array + Asynchron copy host to 2D Array - + - Asynchron copy host to 3D Array + Asynchron copy host to 2D Array - + - Asynchron copy 3D Array to host + Asynchron copy 2D Array to host - + - Asynchron copy 3D Array to host + Asynchron copy 2D Array to host - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Asynchron Copy host to device - Device Pointer + + - + - Passes back the flags that were specified when allocating the pinned host buffer + Asynchron copy device to host - + + - + - Enumerator class for CudaPageLockedHostMemory3D_char1 + Asynchron Copy host to device + + - + + + Asynchron copy device to host + + + + + + + Asynchron Copy host to pitched device + + + + + + + + Asynchron Copy host to pitched device + + + + + + + Asynchron copy device to host + + + + + + + + Asynchron copy device to host + + + + + + + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + + Device Pointer + + + + Passes back the flags that were specified when allocating the pinned host buffer + + + + + + Enumerator class for CudaPageLockedHostMemory2D_cuFloatComplex + + + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: cuFloatReal - + - Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device + + + + + + + Synchron Copy host to pitched device + + + + + + Synchron copy device to host + + + + + + + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + - + - Synchron copy host to 3D Array + Asynchron copy host to 2D Array + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + Asynchron Copy host to device - + - Asynchron Copy host to device + Asynchron copy device to host - + - + - Asynchron copy device to host + Asynchron Copy host to device - + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_char2 + Enumerator class for CudaPageLockedHostMemory2D_cuFloatReal - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: dim3 - + - Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc without flags. In elements In elements - In elements - + - Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory2D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc. In elements In elements - In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - - - Depth in elements - - - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. X-index in elements Y-index in elements - Z-index in elements - + + + Synchron copy host to 2D Array + + + + + + Synchron copy host to 2D Array + + + + + + Synchron copy 2D Array to host + + + + + + Synchron copy 2D Array to host + + + + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to 3D array + Synchron Copy host to pitched device + + + + + + + Synchron Copy host to pitched device + + + + + + Synchron copy device to host + + + + + + + Synchron copy device to host + + + + + + Asynchron copy host to 2D Array + - + - Synchron copy host to 3D Array + Asynchron copy host to 2D Array + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + - Synchron copy 3D Array to host + Asynchron copy 2D Array to host + - + Asynchron Copy host to device - + - Asynchron Copy host to device + Asynchron copy device to host - + - + - Asynchron copy device to host + Asynchron Copy host to device - + Asynchron copy device to host - + - + - Asynchron copy host to 3D array + Asynchron Copy host to pitched device - + + - + - Asynchron copy host to 3D Array + Asynchron Copy host to pitched device - + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + + - + - Asynchron copy 3D Array to host + Asynchron copy device to host - + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_char3 + Enumerator class for CudaPageLockedHostMemory2D_dim3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -44839,86 +45224,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_byte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(byte). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -44927,162 +45312,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_char4 + Enumerator class for CudaPageLockedHostMemory3D_byte - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -45090,86 +45475,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uchar1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -45178,162 +45563,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_short + Enumerator class for CudaPageLockedHostMemory3D_uchar1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -45341,86 +45726,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uchar2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -45429,162 +45814,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_short1 + Enumerator class for CudaPageLockedHostMemory3D_uchar2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -45592,86 +45977,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uchar3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar3). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -45680,162 +46065,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_short2 + Enumerator class for CudaPageLockedHostMemory3D_uchar3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -45843,86 +46228,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uchar4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uchar4). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -45931,162 +46316,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_short3 + Enumerator class for CudaPageLockedHostMemory3D_uchar4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -46094,86 +46479,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_sbyte and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(sbyte). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -46182,162 +46567,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_short4 + Enumerator class for CudaPageLockedHostMemory3D_sbyte - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -46345,86 +46730,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_char1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -46433,162 +46818,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ushort + Enumerator class for CudaPageLockedHostMemory3D_char1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -46596,86 +46981,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_char2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -46684,162 +47069,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ushort1 + Enumerator class for CudaPageLockedHostMemory3D_char2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -46847,86 +47232,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_char3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char3). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -46935,162 +47320,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ushort2 + Enumerator class for CudaPageLockedHostMemory3D_char3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -47098,86 +47483,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_char4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(char4). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -47186,162 +47571,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ushort3 + Enumerator class for CudaPageLockedHostMemory3D_char4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -47349,86 +47734,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_short and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -47437,162 +47822,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ushort4 + Enumerator class for CudaPageLockedHostMemory3D_short - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -47600,86 +47985,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_short1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -47688,162 +48073,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_int + Enumerator class for CudaPageLockedHostMemory3D_short1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -47851,86 +48236,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_short2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -47939,162 +48324,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_int1 + Enumerator class for CudaPageLockedHostMemory3D_short2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -48102,86 +48487,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_short3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short3). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -48190,162 +48575,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_int2 + Enumerator class for CudaPageLockedHostMemory3D_short3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -48353,86 +48738,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_short4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(short4). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -48441,162 +48826,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_int3 + Enumerator class for CudaPageLockedHostMemory3D_short4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -48604,86 +48989,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ushort and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -48692,162 +49077,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_int4 + Enumerator class for CudaPageLockedHostMemory3D_ushort - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -48855,86 +49240,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ushort1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -48943,162 +49328,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uint + Enumerator class for CudaPageLockedHostMemory3D_ushort1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -49106,86 +49491,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ushort2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -49194,162 +49579,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uint1 + Enumerator class for CudaPageLockedHostMemory3D_ushort2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -49357,86 +49742,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ushort3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort3). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -49445,162 +49830,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uint2 + Enumerator class for CudaPageLockedHostMemory3D_ushort3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -49608,86 +49993,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ushort4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ushort4). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -49696,162 +50081,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uint3 + Enumerator class for CudaPageLockedHostMemory3D_ushort4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -49859,86 +50244,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_int and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -49947,162 +50332,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_uint4 + Enumerator class for CudaPageLockedHostMemory3D_int - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -50110,86 +50495,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_int1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -50198,162 +50583,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_long + Enumerator class for CudaPageLockedHostMemory3D_int1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -50361,86 +50746,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_int2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -50449,162 +50834,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_long1 + Enumerator class for CudaPageLockedHostMemory3D_int2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -50612,86 +50997,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_int3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int3). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -50700,162 +51085,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_long2 + Enumerator class for CudaPageLockedHostMemory3D_int3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -50863,86 +51248,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_int4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(int4). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -50951,162 +51336,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ulong + Enumerator class for CudaPageLockedHostMemory3D_int4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -51114,86 +51499,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uint and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -51202,162 +51587,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ulong1 + Enumerator class for CudaPageLockedHostMemory3D_uint - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -51365,86 +51750,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uint1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -51453,162 +51838,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_ulong2 + Enumerator class for CudaPageLockedHostMemory3D_uint1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -51616,86 +52001,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uint2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -51704,162 +52089,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_float + Enumerator class for CudaPageLockedHostMemory3D_uint2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -51867,86 +52252,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uint3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint3). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -51955,162 +52340,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_float1 + Enumerator class for CudaPageLockedHostMemory3D_uint3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -52118,86 +52503,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_uint4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(uint4). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -52206,162 +52591,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_float2 + Enumerator class for CudaPageLockedHostMemory3D_uint4 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -52369,86 +52754,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_long and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -52457,162 +52842,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_float3 + Enumerator class for CudaPageLockedHostMemory3D_long - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -52620,86 +53005,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_long1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -52708,162 +53093,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_float4 + Enumerator class for CudaPageLockedHostMemory3D_long1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -52871,86 +53256,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_long2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(long2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -52959,162 +53344,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_double + Enumerator class for CudaPageLockedHostMemory3D_long2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -53122,86 +53507,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ulong and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -53210,162 +53595,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_double1 + Enumerator class for CudaPageLockedHostMemory3D_ulong - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -53373,86 +53758,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ulong1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -53461,162 +53846,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_double2 + Enumerator class for CudaPageLockedHostMemory3D_ulong1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -53624,86 +54009,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_ulong2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(ulong2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -53712,162 +54097,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_cuDoubleComplex + Enumerator class for CudaPageLockedHostMemory3D_ulong2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -53875,86 +54260,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_float and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -53963,162 +54348,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_cuDoubleReal + Enumerator class for CudaPageLockedHostMemory3D_float - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -54126,86 +54511,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_float1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float1). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -54214,162 +54599,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_cuFloatComplex + Enumerator class for CudaPageLockedHostMemory3D_float1 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -54377,86 +54762,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_float2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float2). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -54465,162 +54850,162 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_cuFloatReal + Enumerator class for CudaPageLockedHostMemory3D_float2 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc In elements Width including alignment in bytes @@ -54628,86 +55013,86 @@ In elements - + - Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. In elements Width including alignment in bytes In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc without flags. + Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc without flags. In elements In elements In elements - + - Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. - Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc. + Creates a new CudaPageLockedHostMemory3D_float3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float3). Using cuMemHostAlloc. In elements In elements In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Width in elements - + Height in elements - + Depth in elements - + Pitch in bytes - + Size in bytes - + Type size in bytes - + Access array per element. @@ -54716,3360 +55101,2433 @@ Z-index in elements - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to 3D array - + Synchron copy host to 3D Array - + Synchron copy 3D Array to host - + Synchron copy 3D Array to host - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron copy host to 3D array - + Asynchron copy host to 3D Array - + Asynchron copy 3D Array to host - + Asynchron copy 3D Array to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory3D_dim3 + Enumerator class for CudaPageLockedHostMemory3D_float3 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: byte + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_float4 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(float4). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - - - Size in bytes - - - - - Size in elements - - - - - Access array per element. - - index in elements - - - - - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - - - Synchron copy host to 1D Array - - - - - + - Synchron copy host to 1D Array + Width in elements - - + - Synchron copy host to 1D Array + Height in elements - - + - Synchron copy host to 1D Array + Depth in elements - - - + - Synchron copy 1D Array to host + Pitch in bytes - - - + - Synchron copy 1D Array to host + Size in bytes - - + - Synchron copy 1D Array to host + Type size in bytes - - + - Synchron copy 1D Array to host + Access array per element. - - + X-index in elements + Y-index in elements + Z-index in elements + - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D array - - - - - Asynchron copy host to 1D Array - - - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D Array - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - - - - Asynchron copy 1D Array to host - - - - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_float4 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar1 + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_double and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + - Size in bytes + Width in elements - + - Size in elements + Height in elements - + - Access array per element. + Depth in elements - index in elements - - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Pitch in bytes - + - Synchron copy host to 1D Array + Size in bytes - - - + - Synchron copy host to 1D Array + Type size in bytes - - + - Synchron copy host to 1D Array + Access array per element. - + X-index in elements + Y-index in elements + Z-index in elements + - + - Synchron copy host to 1D Array + Synchron copy host to device - - + - + - Synchron copy 1D Array to host + Synchron copy host to device - - + - + - Synchron copy 1D Array to host + Synchron copy device to host - + - + - Synchron copy 1D Array to host - - - - - - Synchron copy 1D Array to host - - - - - - - Synchron copy host to device - - - - - - Synchron copy host to device - - - - - - Synchron copy device to host - - - - - - Synchron copy device to host + Synchron copy device to host - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D array - - - - Asynchron copy host to 1D Array - - - - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D Array - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - - - - Asynchron copy 1D Array to host - - - - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_double - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar2 + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_double1 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double1). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - - - Size in bytes - - - - - Size in elements - - - - - Access array per element. - - index in elements - - - - - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - - - Synchron copy host to 1D Array - - - - - + - Synchron copy host to 1D Array + Width in elements - - + - Synchron copy host to 1D Array + Height in elements - - + - Synchron copy host to 1D Array + Depth in elements - - - + - Synchron copy 1D Array to host + Pitch in bytes - - - + - Synchron copy 1D Array to host + Size in bytes - - + - Synchron copy 1D Array to host + Type size in bytes - - + - Synchron copy 1D Array to host + Access array per element. - - + X-index in elements + Y-index in elements + Z-index in elements + - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D array - - - - - Asynchron copy host to 1D Array - - - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D Array - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - - - - Asynchron copy 1D Array to host - - - - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_double1 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar3 + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_double2 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(double2). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - - - Size in bytes - - - - - Size in elements - - - - - Access array per element. - - index in elements - - - - - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - - - Synchron copy host to 1D Array - - - - - + - Synchron copy host to 1D Array + Width in elements - - + - Synchron copy host to 1D Array + Height in elements - - + - Synchron copy host to 1D Array + Depth in elements - - - + - Synchron copy 1D Array to host + Pitch in bytes - - - + - Synchron copy 1D Array to host + Size in bytes - - + - Synchron copy 1D Array to host + Type size in bytes - - + - Synchron copy 1D Array to host + Access array per element. - - + X-index in elements + Y-index in elements + Z-index in elements + - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D array - - - - - Asynchron copy host to 1D Array - - - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D Array - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - - - - Asynchron copy 1D Array to host - - - - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_double2 - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uchar4 + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_cuDoubleComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleComplex). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - - - Size in bytes - - - - - Size in elements - - - - - Access array per element. - - index in elements - - - - - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - - - Synchron copy host to 1D Array - - - - - + - Synchron copy host to 1D Array + Width in elements - - + - Synchron copy host to 1D Array + Height in elements - - + - Synchron copy host to 1D Array + Depth in elements - - - + - Synchron copy 1D Array to host + Pitch in bytes - - - + - Synchron copy 1D Array to host + Size in bytes - - + - Synchron copy 1D Array to host + Type size in bytes - - + - Synchron copy 1D Array to host + Access array per element. - - + X-index in elements + Y-index in elements + Z-index in elements + - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D array - - - - - Asynchron copy host to 1D Array - - - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D Array - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - - - - Asynchron copy 1D Array to host - - - - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_cuDoubleComplex - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: sbyte + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_cuDoubleReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuDoubleReal). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - - - Size in bytes - - - - - Size in elements - - - - - Access array per element. - - index in elements - - - - - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - - - Synchron copy host to 1D Array - - - - - + - Synchron copy host to 1D Array + Width in elements - - + - Synchron copy host to 1D Array + Height in elements - - + - Synchron copy host to 1D Array + Depth in elements - - - + - Synchron copy 1D Array to host + Pitch in bytes - - - + - Synchron copy 1D Array to host + Size in bytes - - + - Synchron copy 1D Array to host + Type size in bytes - - + - Synchron copy 1D Array to host + Access array per element. - - + X-index in elements + Y-index in elements + Z-index in elements + - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D array - - - - - Asynchron copy host to 1D Array - - - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D Array - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - - - - Asynchron copy 1D Array to host - - - - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_cuDoubleReal - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char1 + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_cuFloatComplex and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatComplex). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - - - Size in bytes - - - - - Size in elements - - - - - Access array per element. - - index in elements - - - - - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - - - Synchron copy host to 1D Array - - - - - + - Synchron copy host to 1D Array + Width in elements - - + - Synchron copy host to 1D Array + Height in elements - - + - Synchron copy host to 1D Array + Depth in elements - - - + - Synchron copy 1D Array to host + Pitch in bytes - - - + - Synchron copy 1D Array to host + Size in bytes - - + - Synchron copy 1D Array to host + Type size in bytes - - + - Synchron copy 1D Array to host + Access array per element. - - + X-index in elements + Y-index in elements + Z-index in elements + - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + - Synchron copy host to device + Synchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Synchron copy host to device + Synchron copy host to 3D Array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Synchron copy device to host + Synchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Synchron copy device to host + Synchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy host to 1D Array + Asynchron Copy host to device - + - in bytes - + - Asynchron copy host to 1D Array + Asynchron Copy host to device - + - + - Asynchron copy host to 1D Array + Asynchron copy device to host - + - + - Asynchron copy host to 1D Array + Asynchron copy device to host - + - in bytes - + - Asynchron copy 1D Array to host + Asynchron copy host to 3D array - bytes - + - Asynchron copy 1D Array to host + Asynchron copy host to 3D Array - + - + - Asynchron copy 1D Array to host + Asynchron copy 3D Array to host - + - + - Asynchron copy 1D Array to host + Asynchron copy 3D Array to host - bytes - + - Asynchron Copy host to device + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - - + Device Pointer - + - Asynchron Copy host to device + Passes back the flags that were specified when allocating the pinned host buffer - - + - + - Asynchron copy device to host - - - - - - - Asynchron copy device to host - - - - - - - Asynchron Copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - - Asynchron Copy host to device - - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - - Asynchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - - Asynchron copy device to host - - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - - Device Pointer - - - - Passes back the flags that were specified when allocating the pinned host buffer - - - - - - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_cuFloatComplex - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char2 + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_cuFloatReal and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(cuFloatReal). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - - - Size in bytes - - - - - Size in elements - - - - - Access array per element. - - index in elements - - - - - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - - - - - Synchron copy host to 1D Array - - - - - + - Synchron copy host to 1D Array + Width in elements - - + - Synchron copy host to 1D Array + Height in elements - - + - Synchron copy host to 1D Array + Depth in elements - - - + - Synchron copy 1D Array to host + Pitch in bytes - - - + - Synchron copy 1D Array to host + Size in bytes - - + - Synchron copy 1D Array to host + Type size in bytes - - + - Synchron copy 1D Array to host + Access array per element. - - + X-index in elements + Y-index in elements + Z-index in elements + - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D array - - - - - Asynchron copy host to 1D Array - - - - + - Asynchron copy host to 1D Array + Synchron copy host to 3D Array - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - - - - Asynchron copy 1D Array to host - - - - + - Asynchron copy 1D Array to host + Synchron copy 3D Array to host - - bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_cuFloatReal - + - + - + - + - + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char3 + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. - In elements + In elements + Width including alignment in bytes + In elements + In elements - + - Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! - hostPointer won't be freed while disposing. + Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc without flags. - - In elements + In elements + In elements + In elements - + + + Creates a new CudaPageLockedHostMemory3D_dim3 and allocates the memory on host. Using cuMemHostAlloc without flags. + Pitch is assumed to be width * sizeof(dim3). Using cuMemHostAlloc. + + In elements + In elements + In elements + + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + - Size in bytes + Width in elements - + - Size in elements + Height in elements - + - Access array per element. + Depth in elements - index in elements - - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Pitch in bytes - + - Synchron copy host to 1D Array + Size in bytes - - - + - Synchron copy host to 1D Array + Type size in bytes - - + - Synchron copy host to 1D Array + Access array per element. - + X-index in elements + Y-index in elements + Z-index in elements + - + - Synchron copy host to 1D Array + Synchron copy host to device - - + - + - Synchron copy 1D Array to host + Synchron copy host to device - - + - + - Synchron copy 1D Array to host + Synchron copy device to host - + - + - Synchron copy 1D Array to host + Synchron copy device to host - + - + - Synchron copy 1D Array to host + Synchron copy host to 3D array - - + - + - Synchron copy host to device + Synchron copy host to 3D Array - + - + - Synchron copy host to device + Synchron copy 3D Array to host - + - + - Synchron copy device to host + Synchron copy 3D Array to host - + - + - Synchron copy device to host + Asynchron Copy host to device + - + - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy host to device - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Synchron copy device to host - - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy - - - - Asynchron copy host to 1D Array - - - - in bytes - - - - Asynchron copy host to 1D Array - - - - - - - Asynchron copy host to 1D Array - - - - - - - Asynchron copy host to 1D Array - - - - in bytes - - - - Asynchron copy 1D Array to host - - - - bytes - - - - Asynchron copy 1D Array to host - - - - - - - Asynchron copy 1D Array to host - - - - - - - Asynchron copy 1D Array to host - - - - bytes - - - - Asynchron Copy host to device - - - - - - - Asynchron Copy host to device + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Asynchron Copy host to device + Asynchron copy host to 3D array - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron Copy host to device + Asynchron copy host to 3D Array - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - Pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + - Asynchron copy device to host + Asynchron copy 3D Array to host - - Offset to source pointer in bytes - Offset to destination pointer in bytes - Bytes to copy + - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + - Enumerator class for CudaPageLockedHostMemory + Enumerator class for CudaPageLockedHostMemory3D_dim3 - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: char4 + Type: byte - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -58077,126 +57535,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -58205,7 +57663,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -58214,7 +57672,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -58223,7 +57681,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -58232,7 +57690,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -58240,21 +57698,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -58262,7 +57720,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -58270,21 +57728,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -58292,35 +57750,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -58330,7 +57788,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -58340,7 +57798,7 @@ Bytes to copy - + Asynchron copy device to host @@ -58350,7 +57808,7 @@ Bytes to copy - + Asynchron copy device to host @@ -58360,70 +57818,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short + Type: uchar1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -58431,126 +57889,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -58559,7 +58017,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -58568,7 +58026,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -58577,7 +58035,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -58586,7 +58044,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -58594,21 +58052,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -58616,7 +58074,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -58624,21 +58082,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -58646,35 +58104,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -58684,7 +58142,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -58694,7 +58152,7 @@ Bytes to copy - + Asynchron copy device to host @@ -58704,7 +58162,7 @@ Bytes to copy - + Asynchron copy device to host @@ -58714,70 +58172,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short1 + Type: uchar2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -58785,126 +58243,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -58913,7 +58371,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -58922,7 +58380,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -58931,7 +58389,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -58940,7 +58398,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -58948,21 +58406,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -58970,7 +58428,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -58978,21 +58436,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -59000,35 +58458,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -59038,7 +58496,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -59048,7 +58506,7 @@ Bytes to copy - + Asynchron copy device to host @@ -59058,7 +58516,7 @@ Bytes to copy - + Asynchron copy device to host @@ -59068,70 +58526,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short2 + Type: uchar3 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -59139,126 +58597,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -59267,7 +58725,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -59276,7 +58734,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -59285,7 +58743,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -59294,7 +58752,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -59302,21 +58760,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -59324,7 +58782,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -59332,21 +58790,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -59354,35 +58812,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -59392,7 +58850,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -59402,7 +58860,7 @@ Bytes to copy - + Asynchron copy device to host @@ -59412,7 +58870,7 @@ Bytes to copy - + Asynchron copy device to host @@ -59422,70 +58880,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short3 + Type: uchar4 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -59493,126 +58951,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -59621,7 +59079,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -59630,7 +59088,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -59639,7 +59097,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -59648,7 +59106,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -59656,21 +59114,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -59678,7 +59136,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -59686,21 +59144,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -59708,35 +59166,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -59746,7 +59204,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -59756,7 +59214,7 @@ Bytes to copy - + Asynchron copy device to host @@ -59766,7 +59224,7 @@ Bytes to copy - + Asynchron copy device to host @@ -59776,70 +59234,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: short4 + Type: sbyte - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -59847,126 +59305,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -59975,7 +59433,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -59984,7 +59442,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -59993,7 +59451,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -60002,7 +59460,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -60010,21 +59468,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -60032,7 +59490,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -60040,21 +59498,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -60062,35 +59520,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -60100,7 +59558,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -60110,7 +59568,7 @@ Bytes to copy - + Asynchron copy device to host @@ -60120,7 +59578,7 @@ Bytes to copy - + Asynchron copy device to host @@ -60130,70 +59588,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort + Type: char1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -60201,126 +59659,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -60329,7 +59787,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -60338,7 +59796,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -60347,7 +59805,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -60356,7 +59814,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -60364,21 +59822,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -60386,7 +59844,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -60394,21 +59852,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -60416,35 +59874,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -60454,7 +59912,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -60464,7 +59922,7 @@ Bytes to copy - + Asynchron copy device to host @@ -60474,7 +59932,7 @@ Bytes to copy - + Asynchron copy device to host @@ -60484,70 +59942,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort1 + Type: char2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -60555,126 +60013,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -60683,7 +60141,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -60692,7 +60150,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -60701,7 +60159,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -60710,7 +60168,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -60718,21 +60176,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -60740,7 +60198,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -60748,21 +60206,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -60770,35 +60228,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -60808,7 +60266,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -60818,7 +60276,7 @@ Bytes to copy - + Asynchron copy device to host @@ -60828,7 +60286,7 @@ Bytes to copy - + Asynchron copy device to host @@ -60838,70 +60296,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort2 + Type: char3 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -60909,126 +60367,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -61037,7 +60495,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -61046,7 +60504,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -61055,7 +60513,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -61064,7 +60522,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -61072,21 +60530,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -61094,7 +60552,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -61102,21 +60560,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -61124,35 +60582,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -61162,7 +60620,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -61172,7 +60630,7 @@ Bytes to copy - + Asynchron copy device to host @@ -61182,7 +60640,7 @@ Bytes to copy - + Asynchron copy device to host @@ -61192,70 +60650,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort3 + Type: char4 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -61263,126 +60721,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -61391,7 +60849,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -61400,7 +60858,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -61409,7 +60867,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -61418,7 +60876,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -61426,21 +60884,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -61448,7 +60906,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -61456,21 +60914,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -61478,35 +60936,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -61516,7 +60974,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -61526,7 +60984,7 @@ Bytes to copy - + Asynchron copy device to host @@ -61536,7 +60994,7 @@ Bytes to copy - + Asynchron copy device to host @@ -61546,70 +61004,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ushort4 + Type: short - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -61617,126 +61075,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -61745,7 +61203,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -61754,7 +61212,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -61763,7 +61221,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -61772,7 +61230,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -61780,21 +61238,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -61802,7 +61260,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -61810,21 +61268,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -61832,35 +61290,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -61870,7 +61328,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -61880,7 +61338,7 @@ Bytes to copy - + Asynchron copy device to host @@ -61890,7 +61348,7 @@ Bytes to copy - + Asynchron copy device to host @@ -61900,70 +61358,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int + Type: short1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -61971,126 +61429,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -62099,7 +61557,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -62108,7 +61566,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -62117,7 +61575,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -62126,7 +61584,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -62134,21 +61592,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -62156,7 +61614,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -62164,21 +61622,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -62186,35 +61644,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -62224,7 +61682,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -62234,7 +61692,7 @@ Bytes to copy - + Asynchron copy device to host @@ -62244,7 +61702,7 @@ Bytes to copy - + Asynchron copy device to host @@ -62254,70 +61712,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int1 + Type: short2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -62325,126 +61783,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -62453,7 +61911,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -62462,7 +61920,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -62471,7 +61929,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -62480,7 +61938,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -62488,21 +61946,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -62510,7 +61968,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -62518,21 +61976,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -62540,35 +61998,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -62578,7 +62036,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -62588,7 +62046,7 @@ Bytes to copy - + Asynchron copy device to host @@ -62598,7 +62056,7 @@ Bytes to copy - + Asynchron copy device to host @@ -62608,70 +62066,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int2 + Type: short3 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -62679,126 +62137,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -62807,7 +62265,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -62816,7 +62274,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -62825,7 +62283,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -62834,7 +62292,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -62842,21 +62300,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -62864,7 +62322,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -62872,21 +62330,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -62894,35 +62352,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -62932,7 +62390,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -62942,7 +62400,7 @@ Bytes to copy - + Asynchron copy device to host @@ -62952,7 +62410,7 @@ Bytes to copy - + Asynchron copy device to host @@ -62962,70 +62420,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int3 + Type: short4 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -63033,126 +62491,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -63161,7 +62619,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -63170,7 +62628,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -63179,7 +62637,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -63188,7 +62646,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -63196,21 +62654,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -63218,7 +62676,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -63226,21 +62684,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -63248,35 +62706,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -63286,7 +62744,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -63296,7 +62754,7 @@ Bytes to copy - + Asynchron copy device to host @@ -63306,7 +62764,7 @@ Bytes to copy - + Asynchron copy device to host @@ -63316,70 +62774,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: int4 + Type: ushort - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -63387,126 +62845,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -63515,7 +62973,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -63524,7 +62982,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -63533,7 +62991,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -63542,7 +63000,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -63550,21 +63008,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -63572,7 +63030,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -63580,21 +63038,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -63602,35 +63060,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -63640,7 +63098,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -63650,7 +63108,7 @@ Bytes to copy - + Asynchron copy device to host @@ -63660,7 +63118,7 @@ Bytes to copy - + Asynchron copy device to host @@ -63670,70 +63128,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint + Type: ushort1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -63741,126 +63199,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -63869,7 +63327,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -63878,7 +63336,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -63887,7 +63345,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -63896,7 +63354,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -63904,21 +63362,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -63926,7 +63384,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -63934,21 +63392,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -63956,35 +63414,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -63994,7 +63452,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -64004,7 +63462,7 @@ Bytes to copy - + Asynchron copy device to host @@ -64014,7 +63472,7 @@ Bytes to copy - + Asynchron copy device to host @@ -64024,70 +63482,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint1 + Type: ushort2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -64095,126 +63553,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -64223,7 +63681,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -64232,7 +63690,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -64241,7 +63699,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -64250,7 +63708,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -64258,21 +63716,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -64280,7 +63738,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -64288,21 +63746,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -64310,35 +63768,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -64348,7 +63806,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -64358,7 +63816,7 @@ Bytes to copy - + Asynchron copy device to host @@ -64368,7 +63826,7 @@ Bytes to copy - + Asynchron copy device to host @@ -64378,70 +63836,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint2 + Type: ushort3 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -64449,126 +63907,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -64577,7 +64035,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -64586,7 +64044,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -64595,7 +64053,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -64604,7 +64062,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -64612,21 +64070,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -64634,7 +64092,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -64642,21 +64100,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -64664,35 +64122,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -64702,7 +64160,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -64712,7 +64170,7 @@ Bytes to copy - + Asynchron copy device to host @@ -64722,7 +64180,7 @@ Bytes to copy - + Asynchron copy device to host @@ -64732,70 +64190,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint3 + Type: ushort4 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -64803,126 +64261,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -64931,7 +64389,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -64940,7 +64398,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -64949,7 +64407,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -64958,7 +64416,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -64966,21 +64424,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -64988,7 +64446,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -64996,21 +64454,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -65018,35 +64476,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -65056,7 +64514,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -65066,7 +64524,7 @@ Bytes to copy - + Asynchron copy device to host @@ -65076,7 +64534,7 @@ Bytes to copy - + Asynchron copy device to host @@ -65086,70 +64544,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: uint4 + Type: int - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -65157,126 +64615,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -65285,7 +64743,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -65294,7 +64752,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -65303,7 +64761,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -65312,7 +64770,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -65320,21 +64778,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -65342,7 +64800,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -65350,21 +64808,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -65372,35 +64830,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -65410,7 +64868,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -65420,7 +64878,7 @@ Bytes to copy - + Asynchron copy device to host @@ -65430,7 +64888,7 @@ Bytes to copy - + Asynchron copy device to host @@ -65440,70 +64898,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: long + Type: int1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -65511,126 +64969,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -65639,7 +65097,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -65648,7 +65106,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -65657,7 +65115,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -65666,7 +65124,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -65674,21 +65132,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -65696,7 +65154,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -65704,21 +65162,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -65726,35 +65184,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -65764,7 +65222,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -65774,7 +65232,7 @@ Bytes to copy - + Asynchron copy device to host @@ -65784,7 +65242,7 @@ Bytes to copy - + Asynchron copy device to host @@ -65794,70 +65252,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: long1 + Type: int2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -65865,126 +65323,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -65993,7 +65451,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -66002,7 +65460,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -66011,7 +65469,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -66020,7 +65478,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -66028,21 +65486,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -66050,7 +65508,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -66058,21 +65516,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -66080,35 +65538,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -66118,7 +65576,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -66128,7 +65586,7 @@ Bytes to copy - + Asynchron copy device to host @@ -66138,7 +65596,7 @@ Bytes to copy - + Asynchron copy device to host @@ -66148,70 +65606,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: long2 + Type: int3 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -66219,126 +65677,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -66347,7 +65805,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -66356,7 +65814,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -66365,7 +65823,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -66374,7 +65832,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -66382,21 +65840,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -66404,7 +65862,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -66412,21 +65870,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -66434,35 +65892,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -66472,7 +65930,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -66482,7 +65940,7 @@ Bytes to copy - + Asynchron copy device to host @@ -66492,7 +65950,7 @@ Bytes to copy - + Asynchron copy device to host @@ -66502,70 +65960,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ulong + Type: int4 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -66573,126 +66031,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -66701,7 +66159,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -66710,7 +66168,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -66719,7 +66177,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -66728,7 +66186,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -66736,21 +66194,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -66758,7 +66216,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -66766,21 +66224,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -66788,35 +66246,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -66826,7 +66284,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -66836,7 +66294,7 @@ Bytes to copy - + Asynchron copy device to host @@ -66846,7 +66304,7 @@ Bytes to copy - + Asynchron copy device to host @@ -66856,70 +66314,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ulong1 + Type: uint - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -66927,126 +66385,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -67055,7 +66513,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -67064,7 +66522,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -67073,7 +66531,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -67082,7 +66540,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -67090,21 +66548,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -67112,7 +66570,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -67120,21 +66578,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -67142,35 +66600,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -67180,7 +66638,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -67190,7 +66648,7 @@ Bytes to copy - + Asynchron copy device to host @@ -67200,7 +66658,7 @@ Bytes to copy - + Asynchron copy device to host @@ -67210,70 +66668,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: ulong2 + Type: uint1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -67281,126 +66739,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -67409,7 +66867,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -67418,7 +66876,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -67427,7 +66885,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -67436,7 +66894,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -67444,21 +66902,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -67466,7 +66924,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -67474,21 +66932,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -67496,35 +66954,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -67534,7 +66992,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -67544,7 +67002,7 @@ Bytes to copy - + Asynchron copy device to host @@ -67554,7 +67012,7 @@ Bytes to copy - + Asynchron copy device to host @@ -67564,70 +67022,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float + Type: uint2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -67635,126 +67093,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -67763,7 +67221,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -67772,7 +67230,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -67781,7 +67239,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -67790,7 +67248,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -67798,21 +67256,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -67820,7 +67278,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -67828,21 +67286,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -67850,35 +67308,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -67888,7 +67346,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -67898,7 +67356,7 @@ Bytes to copy - + Asynchron copy device to host @@ -67908,7 +67366,7 @@ Bytes to copy - + Asynchron copy device to host @@ -67918,70 +67376,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float1 + Type: uint3 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -67989,126 +67447,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -68117,7 +67575,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -68126,7 +67584,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -68135,7 +67593,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -68144,7 +67602,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -68152,21 +67610,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -68174,7 +67632,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -68182,21 +67640,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -68204,35 +67662,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -68242,7 +67700,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -68252,7 +67710,7 @@ Bytes to copy - + Asynchron copy device to host @@ -68262,7 +67720,7 @@ Bytes to copy - + Asynchron copy device to host @@ -68272,70 +67730,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float2 + Type: uint4 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -68343,126 +67801,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -68471,7 +67929,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -68480,7 +67938,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -68489,7 +67947,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -68498,7 +67956,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -68506,21 +67964,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -68528,7 +67986,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -68536,21 +67994,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -68558,35 +68016,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -68596,7 +68054,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -68606,7 +68064,7 @@ Bytes to copy - + Asynchron copy device to host @@ -68616,7 +68074,7 @@ Bytes to copy - + Asynchron copy device to host @@ -68626,70 +68084,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float3 + Type: long - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -68697,126 +68155,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -68825,7 +68283,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -68834,7 +68292,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -68843,7 +68301,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -68852,7 +68310,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -68860,21 +68318,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -68882,7 +68340,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -68890,21 +68348,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -68912,35 +68370,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -68950,7 +68408,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -68960,7 +68418,7 @@ Bytes to copy - + Asynchron copy device to host @@ -68970,7 +68428,7 @@ Bytes to copy - + Asynchron copy device to host @@ -68980,70 +68438,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: float4 + Type: long1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -69051,126 +68509,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -69179,7 +68637,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -69188,7 +68646,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -69197,7 +68655,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -69206,7 +68664,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -69214,21 +68672,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -69236,7 +68694,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -69244,21 +68702,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -69266,35 +68724,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -69304,7 +68762,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -69314,7 +68772,7 @@ Bytes to copy - + Asynchron copy device to host @@ -69324,7 +68782,7 @@ Bytes to copy - + Asynchron copy device to host @@ -69334,70 +68792,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: double + Type: long2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -69405,126 +68863,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -69533,7 +68991,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -69542,7 +69000,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -69551,7 +69009,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -69560,7 +69018,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -69568,21 +69026,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -69590,7 +69048,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -69598,21 +69056,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -69620,35 +69078,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -69658,7 +69116,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -69668,7 +69126,7 @@ Bytes to copy - + Asynchron copy device to host @@ -69678,7 +69136,7 @@ Bytes to copy - + Asynchron copy device to host @@ -69688,70 +69146,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: double1 + Type: ulong - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -69759,126 +69217,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -69887,7 +69345,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -69896,7 +69354,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -69905,7 +69363,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -69914,7 +69372,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -69922,21 +69380,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -69944,7 +69402,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -69952,21 +69410,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -69974,35 +69432,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -70012,7 +69470,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -70022,7 +69480,7 @@ Bytes to copy - + Asynchron copy device to host @@ -70032,7 +69490,7 @@ Bytes to copy - + Asynchron copy device to host @@ -70042,70 +69500,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: double2 + Type: ulong1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -70113,126 +69571,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -70241,7 +69699,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -70250,7 +69708,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -70259,7 +69717,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -70268,7 +69726,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -70276,21 +69734,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -70298,7 +69756,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -70306,21 +69764,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -70328,35 +69786,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -70366,7 +69824,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -70376,7 +69834,7 @@ Bytes to copy - + Asynchron copy device to host @@ -70386,7 +69844,7 @@ Bytes to copy - + Asynchron copy device to host @@ -70396,70 +69854,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuDoubleComplex + Type: ulong2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -70467,126 +69925,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -70595,7 +70053,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -70604,7 +70062,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -70613,7 +70071,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -70622,7 +70080,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -70630,21 +70088,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -70652,7 +70110,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -70660,21 +70118,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -70682,35 +70140,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -70720,7 +70178,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -70730,7 +70188,7 @@ Bytes to copy - + Asynchron copy device to host @@ -70740,7 +70198,7 @@ Bytes to copy - + Asynchron copy device to host @@ -70750,70 +70208,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuDoubleReal + Type: float - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -70821,126 +70279,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -70949,7 +70407,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -70958,7 +70416,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -70967,7 +70425,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -70976,7 +70434,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -70984,21 +70442,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -71006,7 +70464,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -71014,21 +70472,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -71036,35 +70494,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -71074,7 +70532,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -71084,7 +70542,7 @@ Bytes to copy - + Asynchron copy device to host @@ -71094,7 +70552,7 @@ Bytes to copy - + Asynchron copy device to host @@ -71104,70 +70562,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuFloatComplex + Type: float1 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -71175,126 +70633,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -71303,7 +70761,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -71312,7 +70770,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -71321,7 +70779,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -71330,7 +70788,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -71338,21 +70796,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -71360,7 +70818,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -71368,21 +70826,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -71390,35 +70848,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -71428,7 +70886,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -71438,7 +70896,7 @@ Bytes to copy - + Asynchron copy device to host @@ -71448,7 +70906,7 @@ Bytes to copy - + Asynchron copy device to host @@ -71458,70 +70916,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: cuFloatReal + Type: float2 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -71529,126 +70987,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -71657,7 +71115,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -71666,7 +71124,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -71675,7 +71133,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -71684,7 +71142,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -71692,21 +71150,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -71714,7 +71172,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -71722,21 +71180,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -71744,35 +71202,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -71782,7 +71240,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -71792,7 +71250,7 @@ Bytes to copy - + Asynchron copy device to host @@ -71802,7 +71260,7 @@ Bytes to copy - + Asynchron copy device to host @@ -71812,70 +71270,70 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - Type: dim3 + Type: float3 - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc In elements - + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost In elements - + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! hostPointer won't be freed while disposing. @@ -71883,126 +71341,126 @@ In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Access array per element. index in elements - + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Synchron copy host to device @@ -72011,7 +71469,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy host to device @@ -72020,7 +71478,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -72029,7 +71487,7 @@ Offset to destination pointer in bytes Bytes to copy - + Synchron copy device to host @@ -72038,7 +71496,7 @@ Offset to destination pointer in bytes Bytes to copy - + Asynchron copy host to 1D Array @@ -72046,21 +71504,21 @@ in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -72068,7 +71526,7 @@ in bytes - + Asynchron copy 1D Array to host @@ -72076,21 +71534,21 @@ bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -72098,35 +71556,35 @@ bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Asynchron Copy host to device @@ -72136,7 +71594,7 @@ Bytes to copy - + Asynchron Copy host to device @@ -72146,7 +71604,7 @@ Bytes to copy - + Asynchron copy device to host @@ -72156,7 +71614,7 @@ Bytes to copy - + Asynchron copy device to host @@ -72166,2444 +71624,3371 @@ Bytes to copy - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Passes back the flags that were specified when allocating the pinned host buffer - + Enumerator class for CudaPageLockedHostMemory - + - + - + - + - + - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: byte + Type: float4 - + - Creates a new CudaRegisteredHostMemory_byte from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc - must be page size aligned (4KBytes) In elements + - + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + + In elements + + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - - - Returns register status - - - + Access array per element. index in elements - + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Asynchron Copy host to device - Device Pointer + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + Asynchron Copy host to device - + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + + + Asynchron copy device to host + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uchar1 + Asynchron copy device to host + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Creates a new CudaRegisteredHostMemory_uchar1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - must be page size aligned (4KBytes) - In elements + Device Pointer - + - For dispose + Passes back the flags that were specified when allocating the pinned host buffer + - + + + Enumerator class for CudaPageLockedHostMemory + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: double + + + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + + In elements + + + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + + In elements + + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + + + For dispose + + + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - - - Returns register status - - - + Access array per element. index in elements - + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + + + Asynchron Copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron Copy host to device + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + Passes back the flags that were specified when allocating the pinned host buffer - + - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + + + Enumerator class for CudaPageLockedHostMemory - + + + + + + + + + + + + + + + + + + + + + + + + + + + + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uchar2 + Type: double1 - + - Creates a new CudaRegisteredHostMemory_uchar2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc - must be page size aligned (4KBytes) In elements + - + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + + In elements + + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - - - Returns register status - - - + Access array per element. index in elements - + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Asynchron Copy host to device - Device Pointer + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with - - - - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + Asynchron Copy host to device + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uchar3 + Asynchron copy device to host + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Creates a new CudaRegisteredHostMemory_uchar3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Asynchron copy device to host - must be page size aligned (4KBytes) - In elements + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - For dispose + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Device Pointer - + - Dispose + Passes back the flags that were specified when allocating the pinned host buffer + - + - For IDisposable + Enumerator class for CudaPageLockedHostMemory - - + - Pointer to pinned host memory. + + - + - Size in bytes + - + - Size in elements + - + - Returns register status + - + - Access array per element. + - index in elements - + - Synchron copy host to 1D Array + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: double2 + + + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc + + In elements + + + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + + In elements + + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Pointer to pinned host memory. + + + + + Size in bytes + + + + + Size in elements + + + + + Access array per element. + + index in elements + + + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + + + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + + + Asynchron Copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron Copy host to device + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + Passes back the flags that were specified when allocating the pinned host buffer - + - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + + + Enumerator class for CudaPageLockedHostMemory - + + + + + + + + + + + + + + + + + + + + + + + + + + + + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uchar4 + Type: cuDoubleComplex - + - Creates a new CudaRegisteredHostMemory_uchar4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc - must be page size aligned (4KBytes) In elements + - + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + + In elements + + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - - - Returns register status - - - + Access array per element. index in elements - + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Asynchron Copy host to device - Device Pointer + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with - - - - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + Asynchron Copy host to device + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: sbyte + Asynchron copy device to host + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Creates a new CudaRegisteredHostMemory_sbyte from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Asynchron copy device to host - must be page size aligned (4KBytes) - In elements + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - For dispose + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Device Pointer - + - Dispose + Passes back the flags that were specified when allocating the pinned host buffer + - + - For IDisposable + Enumerator class for CudaPageLockedHostMemory - - + - Pointer to pinned host memory. + + - + - Size in bytes + - + - Size in elements + - + - Returns register status + - + - Access array per element. + - index in elements - + - Synchron copy host to 1D Array + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: cuDoubleReal - - - + - Synchron copy host to 1D Array + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc - + In elements + - + - Synchron copy host to 1D Array + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost - + In elements - + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Pointer to pinned host memory. + + + + + Size in bytes + + + + + Size in elements + + + + + Access array per element. + + index in elements + + + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + + + Synchron copy host to 1D Array + + + + + + + Synchron copy host to 1D Array + + + + + + Synchron copy host to 1D Array + + + + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + + + Asynchron Copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron Copy host to device + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + Passes back the flags that were specified when allocating the pinned host buffer - + - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + + + Enumerator class for CudaPageLockedHostMemory - + + + + + + + + + + + + + + + + + + + + + + + + + + + + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: char1 + Type: cuFloatComplex - + - Creates a new CudaRegisteredHostMemory_char1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc - must be page size aligned (4KBytes) In elements + - + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + + In elements + + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - - - Returns register status - - - + Access array per element. index in elements - + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + - Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Asynchron Copy host to device - Device Pointer + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with - - - - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + Asynchron Copy host to device + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: char2 + Asynchron copy device to host + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - Creates a new CudaRegisteredHostMemory_char2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Asynchron copy device to host - must be page size aligned (4KBytes) - In elements + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + - + - For dispose + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Device Pointer - + - Dispose + Passes back the flags that were specified when allocating the pinned host buffer + - + - For IDisposable + Enumerator class for CudaPageLockedHostMemory - - + - Pointer to pinned host memory. + + - + - Size in bytes + - + - Size in elements + - + - Returns register status + - + - Access array per element. + - index in elements - + - Synchron copy host to 1D Array + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + Type: cuFloatReal - - - + - Synchron copy host to 1D Array + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc - + In elements + - + - Synchron copy host to 1D Array + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost - + In elements - + - Synchron copy host to 1D Array + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Pointer to pinned host memory. + + + + + Size in bytes + + + + + Size in elements + + + + + Access array per element. + + index in elements + + + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + + + Synchron copy host to 1D Array + + + + + + + Synchron copy host to 1D Array + + + + + + Synchron copy host to 1D Array + + + + + + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + + + Asynchron Copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron Copy host to device + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + Passes back the flags that were specified when allocating the pinned host buffer - + - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + + + Enumerator class for CudaPageLockedHostMemory - + + + + + + + + + + + + + + + + + + + + + + + + + + + + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. - cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for - natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: char3 + Type: dim3 - + - Creates a new CudaRegisteredHostMemory_char3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemHostAlloc - must be page size aligned (4KBytes) In elements + - + + + Creates a new CudaPageLockedHostMemory and allocates the memory on host. Using cuMemAllocHost + + In elements + + + + Creates a new CudaPageLockedHostMemory from an existing IntPtr. IntPtr must point to page locked memory! + hostPointer won't be freed while disposing. + + + In elements + + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - - - Returns register status - - - + Access array per element. index in elements - + + + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + Synchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + in bytes - + Asynchron copy 1D Array to host - + bytes - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + bytes - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + + + Asynchron Copy host to device + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron Copy host to device + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + Pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + + + Asynchron copy device to host + + + Offset to source pointer in bytes + Offset to destination pointer in bytes + Bytes to copy + + + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + Passes back the flags that were specified when allocating the pinned host buffer - + - - - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + + + Enumerator class for CudaPageLockedHostMemory - + + + + + + + + + + + + + + + + + + + + + + + + + + + + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: char4 + Type: byte - + - Creates a new CudaRegisteredHostMemory_char4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_byte from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -74611,21 +74996,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -74633,7 +75018,7 @@ - + Asynchron copy 1D Array to host @@ -74641,21 +75026,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -74663,41 +75048,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -74710,151 +75095,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: short + Type: uchar1 - + - Creates a new CudaRegisteredHostMemory_short from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uchar1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -74862,21 +75247,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -74884,7 +75269,7 @@ - + Asynchron copy 1D Array to host @@ -74892,21 +75277,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -74914,41 +75299,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -74961,151 +75346,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: short1 + Type: uchar2 - + - Creates a new CudaRegisteredHostMemory_short1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uchar2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -75113,21 +75498,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -75135,7 +75520,7 @@ - + Asynchron copy 1D Array to host @@ -75143,21 +75528,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -75165,41 +75550,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -75212,151 +75597,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: short2 + Type: uchar3 - + - Creates a new CudaRegisteredHostMemory_short2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uchar3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -75364,21 +75749,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -75386,7 +75771,7 @@ - + Asynchron copy 1D Array to host @@ -75394,21 +75779,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -75416,41 +75801,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -75463,151 +75848,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: short3 + Type: uchar4 - + - Creates a new CudaRegisteredHostMemory_short3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uchar4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -75615,21 +76000,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -75637,7 +76022,7 @@ - + Asynchron copy 1D Array to host @@ -75645,21 +76030,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -75667,41 +76052,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -75714,151 +76099,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: short4 + Type: sbyte - + - Creates a new CudaRegisteredHostMemory_short4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_sbyte from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -75866,21 +76251,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -75888,7 +76273,7 @@ - + Asynchron copy 1D Array to host @@ -75896,21 +76281,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -75918,41 +76303,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -75965,151 +76350,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ushort + Type: char1 - + - Creates a new CudaRegisteredHostMemory_ushort from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_char1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -76117,21 +76502,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -76139,7 +76524,7 @@ - + Asynchron copy 1D Array to host @@ -76147,21 +76532,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -76169,41 +76554,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -76216,151 +76601,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ushort1 + Type: char2 - + - Creates a new CudaRegisteredHostMemory_ushort1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_char2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -76368,21 +76753,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -76390,7 +76775,7 @@ - + Asynchron copy 1D Array to host @@ -76398,21 +76783,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -76420,41 +76805,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -76467,151 +76852,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ushort2 + Type: char3 - + - Creates a new CudaRegisteredHostMemory_ushort2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_char3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -76619,21 +77004,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -76641,7 +77026,7 @@ - + Asynchron copy 1D Array to host @@ -76649,21 +77034,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -76671,41 +77056,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -76718,151 +77103,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ushort3 + Type: char4 - + - Creates a new CudaRegisteredHostMemory_ushort3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_char4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -76870,21 +77255,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -76892,7 +77277,7 @@ - + Asynchron copy 1D Array to host @@ -76900,21 +77285,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -76922,41 +77307,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -76969,151 +77354,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ushort4 + Type: short - + - Creates a new CudaRegisteredHostMemory_ushort4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_short from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -77121,21 +77506,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -77143,7 +77528,7 @@ - + Asynchron copy 1D Array to host @@ -77151,21 +77536,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -77173,41 +77558,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -77220,151 +77605,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: int + Type: short1 - + - Creates a new CudaRegisteredHostMemory_int from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_short1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -77372,21 +77757,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -77394,7 +77779,7 @@ - + Asynchron copy 1D Array to host @@ -77402,21 +77787,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -77424,41 +77809,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -77471,151 +77856,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: int1 + Type: short2 - + - Creates a new CudaRegisteredHostMemory_int1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_short2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -77623,21 +78008,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -77645,7 +78030,7 @@ - + Asynchron copy 1D Array to host @@ -77653,21 +78038,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -77675,41 +78060,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -77722,151 +78107,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: int2 + Type: short3 - + - Creates a new CudaRegisteredHostMemory_int2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_short3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -77874,21 +78259,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -77896,7 +78281,7 @@ - + Asynchron copy 1D Array to host @@ -77904,21 +78289,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -77926,41 +78311,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -77973,151 +78358,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: int3 + Type: short4 - + - Creates a new CudaRegisteredHostMemory_int3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_short4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -78125,21 +78510,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -78147,7 +78532,7 @@ - + Asynchron copy 1D Array to host @@ -78155,21 +78540,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -78177,41 +78562,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -78224,151 +78609,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: int4 + Type: ushort - + - Creates a new CudaRegisteredHostMemory_int4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ushort from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -78376,21 +78761,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -78398,7 +78783,7 @@ - + Asynchron copy 1D Array to host @@ -78406,21 +78791,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -78428,41 +78813,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -78475,151 +78860,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uint + Type: ushort1 - + - Creates a new CudaRegisteredHostMemory_uint from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ushort1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -78627,21 +79012,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -78649,7 +79034,7 @@ - + Asynchron copy 1D Array to host @@ -78657,21 +79042,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -78679,41 +79064,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -78726,151 +79111,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uint1 + Type: ushort2 - + - Creates a new CudaRegisteredHostMemory_uint1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ushort2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -78878,21 +79263,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -78900,7 +79285,7 @@ - + Asynchron copy 1D Array to host @@ -78908,21 +79293,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -78930,41 +79315,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -78977,151 +79362,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uint2 + Type: ushort3 - + - Creates a new CudaRegisteredHostMemory_uint2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ushort3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -79129,21 +79514,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -79151,7 +79536,7 @@ - + Asynchron copy 1D Array to host @@ -79159,21 +79544,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -79181,41 +79566,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -79228,151 +79613,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uint3 + Type: ushort4 - + - Creates a new CudaRegisteredHostMemory_uint3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ushort4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -79380,21 +79765,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -79402,7 +79787,7 @@ - + Asynchron copy 1D Array to host @@ -79410,21 +79795,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -79432,41 +79817,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -79479,151 +79864,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: uint4 + Type: int - + - Creates a new CudaRegisteredHostMemory_uint4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_int from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -79631,21 +80016,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -79653,7 +80038,7 @@ - + Asynchron copy 1D Array to host @@ -79661,21 +80046,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -79683,41 +80068,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -79730,151 +80115,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: long + Type: int1 - + - Creates a new CudaRegisteredHostMemory_long from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_int1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -79882,21 +80267,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -79904,7 +80289,7 @@ - + Asynchron copy 1D Array to host @@ -79912,21 +80297,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -79934,41 +80319,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -79981,151 +80366,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: long1 + Type: int2 - + - Creates a new CudaRegisteredHostMemory_long1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_int2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -80133,21 +80518,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -80155,7 +80540,7 @@ - + Asynchron copy 1D Array to host @@ -80163,21 +80548,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -80185,41 +80570,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -80232,151 +80617,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: long2 + Type: int3 - + - Creates a new CudaRegisteredHostMemory_long2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_int3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -80384,21 +80769,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -80406,7 +80791,7 @@ - + Asynchron copy 1D Array to host @@ -80414,21 +80799,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -80436,41 +80821,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -80483,151 +80868,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ulong + Type: int4 - + - Creates a new CudaRegisteredHostMemory_ulong from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_int4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -80635,21 +81020,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -80657,7 +81042,7 @@ - + Asynchron copy 1D Array to host @@ -80665,21 +81050,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -80687,41 +81072,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -80734,151 +81119,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ulong1 + Type: uint - + - Creates a new CudaRegisteredHostMemory_ulong1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uint from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -80886,21 +81271,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -80908,7 +81293,7 @@ - + Asynchron copy 1D Array to host @@ -80916,21 +81301,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -80938,41 +81323,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -80985,151 +81370,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: ulong2 + Type: uint1 - + - Creates a new CudaRegisteredHostMemory_ulong2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uint1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -81137,21 +81522,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -81159,7 +81544,7 @@ - + Asynchron copy 1D Array to host @@ -81167,21 +81552,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -81189,41 +81574,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -81236,151 +81621,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: float + Type: uint2 - + - Creates a new CudaRegisteredHostMemory_float from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uint2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -81388,21 +81773,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -81410,7 +81795,7 @@ - + Asynchron copy 1D Array to host @@ -81418,21 +81803,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -81440,41 +81825,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -81487,151 +81872,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: float1 + Type: uint3 - + - Creates a new CudaRegisteredHostMemory_float1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uint3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -81639,21 +82024,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -81661,7 +82046,7 @@ - + Asynchron copy 1D Array to host @@ -81669,21 +82054,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -81691,41 +82076,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -81738,151 +82123,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: float2 + Type: uint4 - + - Creates a new CudaRegisteredHostMemory_float2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_uint4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -81890,21 +82275,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -81912,7 +82297,7 @@ - + Asynchron copy 1D Array to host @@ -81920,21 +82305,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -81942,41 +82327,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -81989,151 +82374,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: float3 + Type: long - + - Creates a new CudaRegisteredHostMemory_float3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_long from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -82141,21 +82526,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -82163,7 +82548,7 @@ - + Asynchron copy 1D Array to host @@ -82171,21 +82556,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -82193,41 +82578,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -82240,151 +82625,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: float4 + Type: long1 - + - Creates a new CudaRegisteredHostMemory_float4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_long1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -82392,21 +82777,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -82414,7 +82799,7 @@ - + Asynchron copy 1D Array to host @@ -82422,21 +82807,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -82444,41 +82829,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -82491,151 +82876,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: double + Type: long2 - + - Creates a new CudaRegisteredHostMemory_double from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_long2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -82643,21 +83028,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -82665,7 +83050,7 @@ - + Asynchron copy 1D Array to host @@ -82673,21 +83058,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -82695,41 +83080,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -82742,151 +83127,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: double1 + Type: ulong - + - Creates a new CudaRegisteredHostMemory_double1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ulong from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -82894,21 +83279,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -82916,7 +83301,7 @@ - + Asynchron copy 1D Array to host @@ -82924,21 +83309,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -82946,41 +83331,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -82993,151 +83378,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: double2 + Type: ulong1 - + - Creates a new CudaRegisteredHostMemory_double2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ulong1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -83145,21 +83530,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -83167,7 +83552,7 @@ - + Asynchron copy 1D Array to host @@ -83175,21 +83560,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -83197,41 +83582,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -83244,151 +83629,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: cuDoubleComplex + Type: ulong2 - + - Creates a new CudaRegisteredHostMemory_cuDoubleComplex from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_ulong2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -83396,21 +83781,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -83418,7 +83803,7 @@ - + Asynchron copy 1D Array to host @@ -83426,21 +83811,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -83448,41 +83833,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -83495,151 +83880,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: cuDoubleReal + Type: float - + - Creates a new CudaRegisteredHostMemory_cuDoubleReal from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_float from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -83647,21 +84032,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -83669,7 +84054,7 @@ - + Asynchron copy 1D Array to host @@ -83677,21 +84062,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -83699,41 +84084,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -83746,151 +84131,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: cuFloatComplex + Type: float1 - + - Creates a new CudaRegisteredHostMemory_cuFloatComplex from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_float1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -83898,21 +84283,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -83920,7 +84305,7 @@ - + Asynchron copy 1D Array to host @@ -83928,21 +84313,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -83950,41 +84335,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -83997,151 +84382,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: cuFloatReal + Type: float2 - + - Creates a new CudaRegisteredHostMemory_cuFloatReal from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_float2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -84149,21 +84534,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -84171,7 +84556,7 @@ - + Asynchron copy 1D Array to host @@ -84179,21 +84564,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -84201,41 +84586,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -84248,151 +84633,151 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + The base address must be the same one specified to . - + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for natively allocated memory (Marshal.AllocHGlobal, or a native dll). - Type: dim3 + Type: float3 - + - Creates a new CudaRegisteredHostMemory_dim3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + Creates a new CudaRegisteredHostMemory_float3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! must be page size aligned (4KBytes) In elements - + For dispose - + Dispose - + For IDisposable - + Pointer to pinned host memory. - + Size in bytes - + Size in elements - + Returns register status - + Access array per element. index in elements - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy host to 1D Array - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy 1D Array to host - + Synchron copy host to device - + Synchron copy host to device - + Synchron copy device to host - + Synchron copy device to host - + Asynchron copy host to 1D Array @@ -84400,21 +84785,21 @@ - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array - + Asynchron copy host to 1D Array @@ -84422,7 +84807,7 @@ - + Asynchron copy 1D Array to host @@ -84430,21 +84815,21 @@ - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host - + Asynchron copy 1D Array to host @@ -84452,41 +84837,41 @@ - + Asynchron Copy host to device - + Asynchron Copy host to device - + Asynchron copy device to host - + Asynchron copy device to host - + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag Device Pointer - + Page-locks the memory range specified by p and bytesize and maps it for the device(s) as specified by Flags. This memory range also is added @@ -84499,6076 +84884,8031 @@ best used sparingly to register staging areas for data exchange between host and device. The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + The memory page-locked by this function must be unregistered with - + Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . - - - - - Cuda Surface Object - - - - - Creates a surface object. ResDesc describes - the data to perform surface load/stores on. ResDesc.resType must be - and ResDesc.hArray - must be set to a valid CUDA array handle. ResDesc.flags must be set to zero. - - CudaResourceDesc - - - - Creates a surface object. ResDesc describes - the data to perform surface load/stores on. ResDesc.resType must be - and ResDesc.hArray - must be set to a valid CUDA array handle. + The base address must be the same one specified to . - CudaArray1D - + - Creates a surface object. ResDesc describes - the data to perform surface load/stores on. ResDesc.resType must be - and ResDesc.hArray - must be set to a valid CUDA array handle. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: float4 - CudaArray2D - + - Creates a surface object. ResDesc describes - the data to perform surface load/stores on. ResDesc.resType must be - and ResDesc.hArray - must be set to a valid CUDA array handle. + Creates a new CudaRegisteredHostMemory_float4 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! - CudaArray3D + must be page size aligned (4KBytes) + In elements - + For dispose - + Dispose - + For IDisposable - + - Returns the wrapped CUsurfObject + Pointer to pinned host memory. - + - Returns the CudaResourceDesc used to create the CudaSurfObject + Size in bytes - + - Cuda Texure Object + Size in elements - + - Creates a texture object and returns it in pTexObject. pResDesc describes the data to texture from. pTexDesc - describes how the data should be sampled. + Returns register status - CudaResourceDesc - CudaTextureDescriptor - + - Creates a texture object. ResDesc describes the data to texture from. TexDesc - describes how the data should be sampled. resViewDesc is an optional argument that specifies an alternate format - for the data described by pResDesc, and also describes the subresource region to restrict access to when texturing. - pResViewDesc can only be specified if the type of resource is a CUDA array or a CUDA mipmapped array. + Access array per element. - Describes the data to texture from. - Describes how the data should be sampled. - CudaResourceViewDesc. Only valid if type of resource is a CUDA array or a CUDA mipmapped array + index in elements + - + - For dispose + Synchron copy host to 1D Array + + - + - Dispose + Synchron copy host to 1D Array + - + - For IDisposable + Synchron copy host to 1D Array - + - + - Returns the wrapped CUtexObject + Synchron copy host to 1D Array + + - + - Returns the CudaResourceDesc used to create the CudaTexObject + Synchron copy 1D Array to host + + - + - Returns the CudaTextureDescriptor used to create the CudaTexObject + Synchron copy 1D Array to host + - + - Returns the CudaResourceViewDesc used to create the CudaTexObject + Synchron copy 1D Array to host + - + - Provides methods to bind texture references to kernels + Synchron copy 1D Array to host + + - + - Create a new CudaDeviceVariable and bind it to a texture reference. + Synchron copy host to device - - - - - - In elements + - + - Bind a CudaDeviceVariable to a texture reference. + Synchron copy host to device - - - - - - + - + - Create a new CudaPitchedDeviceVariable and bind it to a texture reference. + Synchron copy device to host - - - - - - - In elements - In elements + - + - Create a new CudaPitchedDeviceVariable and bind it to a texture reference. + Synchron copy device to host - - - - - - - - In elements - In elements + - + - Bind a CudaPitchedDeviceVariable to a texture reference. + Asynchron copy host to 1D Array - - - - - - - + + + - + - Bind a CudaPitchedDeviceVariable to a texture reference. + Asynchron copy host to 1D Array - - - - - - - - + + - + - Create a new CudaArray1D and bind it to a texture reference. + Asynchron copy host to 1D Array - - - - - - - In elements - + + - + - Bind a CudaArray1D to a texture reference. + Asynchron copy host to 1D Array - - - - - + + - + - Create a new CudaArray2D and bind it to a texture reference. + Asynchron copy 1D Array to host - - - - - - - In elements - In elements - 1,2 or 4 + + + - + - Create a new CudaArray2D and bind it to a texture reference. + Asynchron copy 1D Array to host - - - - - - - - In elements - In elements - 1,2 or 4 + + - + - Bind a CudaArray2D to a texture reference. + Asynchron copy 1D Array to host - - - - - + - + - Bind a CudaArray2D to a texture reference. + Asynchron copy 1D Array to host - - - - - - + + - + - Create a new CudaArray3D and bind it to a texture reference. + Asynchron Copy host to device - - - - - - - In elements - In elements - In elements - 1,2 or 4 + + - + - Create a new CudaArray3D and bind it to a texture reference. + Asynchron Copy host to device - - - - - - - - - In elements - In elements - In elements - 1,2 or 4 + + - + - Bind a CudaArray3D to a texture reference. + Asynchron copy device to host - - - - - - + + - + - Bind a CudaArray3D to a texture reference. + Asynchron copy device to host - - - - - - - - + + - + - Create a new CudaMipmappedArray and bind it to a texture reference. + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - - - - - - - - - - - - + Device Pointer - + - Create a new CudaMipmappedArray and bind it to a texture reference. + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - - - - - - - - - - - - - - - - Bind a CudaMipmappedArray to a texture reference. + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . - - - - - - - - - - - - + - Bind a CudaMipmappedArray to a texture reference. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: double - - - - - - - - - - - - - - + - Create a new CudaDeviceVariable and bind it to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Creates a new CudaRegisteredHostMemory_double from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! - - - RGBA color - - + must be page size aligned (4KBytes) In elements - + - Bind a CudaDeviceVariable to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + For dispose - - - - - - RGBA color - + - Create a new CudaPitchedDeviceVariable and bind it to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Dispose - - - - - - In elements - In elements - RGBA color - + - Bind a CudaPitchedDeviceVariable to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + For IDisposable - - - - - - - RGBA color + - + - Create a new CudaArray1D and bind it to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Pointer to pinned host memory. - - - - - - In elements - - RGBA color - + - Bind a CudaArray1D to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Size in bytes - - - - - - RGBA color - + - Create a new CudaArray2D and bind it to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Size in elements - - - - - - In elements - In elements - 1,2 or 4 - RGBA color - + - Bind a CudaArray2D to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Returns register status - - - - - - RGBA color - + - Create a new CudaArray3D and bind it to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Access array per element. - - - - - - In elements - In elements - In elements - 1,2 or 4 - RGBA color + index in elements + - + - Bind a CudaArray3D to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Synchron copy host to 1D Array - - - - - - RGBA color + + - + - Create a new CudaMipmappedArray and bind it to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Synchron copy host to 1D Array - - - - - - - - - - - - RGBA color + - + - Bind a CudaMipmappedArray to a texture reference. - Sets the border color for the texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - addressMode is set to CU_TR_ADDRESS_MODE_BORDER + Synchron copy host to 1D Array - - - - - - - - - - RGBA color - + - CudaArrayTexture1D + Synchron copy host to 1D Array + + - + - Creates a new 1D texture from array memory. Allocates new array. + Synchron copy 1D Array to host - - - - - - - In elements - + + - + - Creates a new 1D texture from array memory + Synchron copy 1D Array to host - - - - - - + - + - For dispose + Synchron copy 1D Array to host + - + - Dispose + Synchron copy 1D Array to host + + - + - For IDisposable + Synchron copy host to device - + - + - TextureReference + Synchron copy host to device + - + - Flags + Synchron copy device to host + - + - AddressMode + Synchron copy device to host + - + - Format + Asynchron copy host to 1D Array + + + - + - Format + Asynchron copy host to 1D Array + + - + - Size + Asynchron copy host to 1D Array + + - + - ChannelSize + Asynchron copy host to 1D Array + + + - + - TotalSizeInBytes + Asynchron copy 1D Array to host + + + - + - NumChannels + Asynchron copy 1D Array to host + + - + - Name + Asynchron copy 1D Array to host + + - + - Module + Asynchron copy 1D Array to host + + + - + - CUFuntion + Asynchron Copy host to device + + - + - Array + Asynchron Copy host to device + + - + - CudaArrayTexture2D + Asynchron copy device to host + + - + - Creates a new 2D texture from array memory. Allocates a new 2D array. + Asynchron copy device to host - - - - - - - In elements - In elements - 1,2 or 4 + + - + - Creates a new 2D texture from array memory. Allocates a new 2D array. + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - - - - - - - - In elements - In elements - 1,2 or 4 + Device Pointer - + - Creates a new 2D texture from array memory + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - - - - - - + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . + + + - Creates a new 2D texture from array memory + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: double1 - - - - - - - - + + + Creates a new CudaRegisteredHostMemory_double1 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + + must be page size aligned (4KBytes) + In elements + + For dispose - + Dispose - + For IDisposable - + - TextureReference + Pointer to pinned host memory. - + - Flags + Size in bytes - + - AddressMode + Size in elements - + - AddressMode + Returns register status - + - Format + Access array per element. + index in elements + - + - Format + Synchron copy host to 1D Array + + - + - Height + Synchron copy host to 1D Array + - + - Width + Synchron copy host to 1D Array + - + - ChannelSize + Synchron copy host to 1D Array + + - + - TotalSizeInBytes + Synchron copy 1D Array to host + + - + - NumChannels + Synchron copy 1D Array to host + - + - Name + Synchron copy 1D Array to host + - + - Module + Synchron copy 1D Array to host + + - + - CUFuntion + Synchron copy host to device + - + - Array + Synchron copy host to device + - + - CudaArrayTexture3D + Synchron copy device to host + - + - Creates a new 3D texture from array memory. Allocates a new 3D array. + Synchron copy device to host - - - - - - - In elements - In elements - In elements - 1,2 or 4 + - + - Creates a new 3D texture from array memory. Allocates a new 3D array. + Asynchron copy host to 1D Array - - - - - - - - - In elements - In elements - In elements - 1,2 or 4 + + + - + - Creates a new 3D texture from array memory + Asynchron copy host to 1D Array - - - - - - + + - + - Creates a new 3D texture from array memory + Asynchron copy host to 1D Array - - - - - - - + - + - For dispose + Asynchron copy host to 1D Array + + + - + - Dispose + Asynchron copy 1D Array to host + + + - + - For IDisposable + Asynchron copy 1D Array to host - + + - + - TextureReference + Asynchron copy 1D Array to host + + - + - Flags + Asynchron copy 1D Array to host + + + - + - AddressMode + Asynchron Copy host to device + + - + - AddressMode + Asynchron Copy host to device + + - + - AddressMode + Asynchron copy device to host + + - + - Format + Asynchron copy device to host + + - + - Filtermode + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Device Pointer - + - Depth + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with + - - - Height + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . - + - Width + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: double2 - + - ChannelSize + Creates a new CudaRegisteredHostMemory_double2 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + must be page size aligned (4KBytes) + In elements - + - TotalSizeInBytes + For dispose - + - NumChannels + Dispose - + - Name + For IDisposable + - + - Module + Pointer to pinned host memory. - + - CUFuntion + Size in bytes - + - Array + Size in elements - + - A variable located in CUDA device memory + Returns register status - variable base type - + - Creates a new CudaDeviceVariable and allocates the memory on the device + Access array per element. - In elements + index in elements + - + - Creates a new CudaDeviceVariable from an existing CUdeviceptr. The allocated size is gethered via the CUDA API. - devPtr won't be freed while disposing. + Synchron copy host to 1D Array - + + - + - Creates a new CudaDeviceVariable from an existing CUdeviceptr. The allocated size is gethered via the CUDA API. + Synchron copy host to 1D Array - - The CUdeviceptr will be freed while disposing, if the CudaDeviceVariable is the owner + - + - Creates a new CudaDeviceVariable from an existing CUdeviceptr. - devPtr won't be freed while disposing. + Synchron copy host to 1D Array - - Size in Bytes + - + - Creates a new CudaDeviceVariable from an existing CUdeviceptr. + Synchron copy host to 1D Array - - The CUdeviceptr will be freed while disposing, if the CudaDeviceVariable is the owner - Size in Bytes + + - + - Creates a new CudaDeviceVariable from definition in cu-file. + Synchron copy 1D Array to host - The module where the variable is defined in. - The variable name as defined in the cu-file. + + - + - Creates a new CudaDeviceVariable from definition in cu-file. + Synchron copy 1D Array to host - The kernel which module defines the variable. - The variable name as defined in the cu-file. + - + - For dispose + Synchron copy 1D Array to host + - + - Dispose + Synchron copy 1D Array to host + + - + - For IDisposable + Synchron copy host to device - + - + - Copy data from device to device memory + Synchron copy host to device - Source pointer to device memory + - + - Copy data from device to device memory + Synchron copy device to host - Source pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes + - + - Copy data from device to device memory + Synchron copy device to host - Source + - + - Copy data from device to device memory + Asynchron copy host to 1D Array - Source - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes + + + - + - Copy from device to device memory + Asynchron copy host to 1D Array - Source + + - + - Copy from device to device memory + Asynchron copy host to 1D Array - Source - Offset to source pointer in bytes - Offset to destination pointer in bytes - Width of 2D memory to copy in bytes - Height in elements + + - + - Copy data from host to device memory + Asynchron copy host to 1D Array - Source pointer to host memory + + + - + - Copy data from host to device memory + Asynchron copy 1D Array to host - Source pointer to host memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes + + + - + - Copy data from host to device memory + Asynchron copy 1D Array to host - Source pointer to host memory + + - + - Copy data from host to device memory + Asynchron copy 1D Array to host - Source pointer to host memory - Offset to destination pointer in bytes + + - + - Copy data from host to device memory + Asynchron copy 1D Array to host - Source pointer to host memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes + + + - + - Copy data from host to device memory + Asynchron Copy host to device - Source pointer to host memory - Offset to destination pointer in bytes + + - + - Copy data from host to device memory + Asynchron Copy host to device - Source pointer to host memory + + - + - Copy from Host to device memory. Array elements can be of any (value)type, but total size in bytes must match to allocated device memory. + Asynchron copy device to host - Source + + - + - Copy data from device to host memory + Asynchron copy device to host - Destination pointer to host memory + + - + - Copy data from device to host memory + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - Destination pointer to host memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes + Device Pointer - + - Copy data from device to host memory + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - Destination data in host memory + - + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . + + + - Copy data from device to host memory + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: cuDoubleComplex - Destination data in host memory - Offset to source pointer in bytes - + - Copy data from device to host memory + Creates a new CudaRegisteredHostMemory_cuDoubleComplex from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! - Destination pointer to host memory + must be page size aligned (4KBytes) + In elements - + - Copy data from device to host memory + For dispose - Destination data in host memory - Offset to source pointer in bytes - + - Copy data from device to host memory + Dispose - Destination pointer to host memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes - + - Copy data from this device to host memory. Array elements can be of any (value)type, but total size in bytes must match to allocated device memory. + For IDisposable - Destination + - + - Async Copy data from device to device memory + Pointer to pinned host memory. - Source pointer to device memory - - + - Async Copy data from device to device memory + Size in bytes - Source - - + - Async Copy from device to device memory + Size in elements - Source - - + - Async Copy data from device to device memory + Returns register status - Source pointer to device memory - - + - Async Copy data from device to device memory + Access array per element. - Source - + index in elements + - + - Async Copy from device to device memory + Synchron copy host to 1D Array - Source - + + - + - Async Copy data from device to device memory + Synchron copy host to 1D Array - Source pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes - + - + - Async Copy data from device to device memory + Synchron copy host to 1D Array - Source - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes - + - + - Async Copy data from device to device memory + Synchron copy host to 1D Array - Source pointer to device memory - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes - + + - + - Async Copy data from device to device memory + Synchron copy 1D Array to host - Source - Offset to source pointer in bytes - Offset to destination pointer in bytes - Size to copy in bytes - + + - + - Async Copy from device to device memory + Synchron copy 1D Array to host - Source - Offset to source pointer in bytes - Offset to destination pointer in bytes - Width of 2D memory to copy in bytes - Height in elements - + - + - Async Copy from device to device memory + Synchron copy 1D Array to host - Source - Offset to source pointer in bytes - Offset to destination pointer in bytes - Width of 2D memory to copy in bytes - Height in elements - + - + - Memset + Synchron copy 1D Array to host - + + - + - Memset + Synchron copy host to device - + - + - Memset + Synchron copy host to device - + - + - Memset + Synchron copy device to host - - + - + - Memset + Synchron copy device to host - - + - + - Memset + Asynchron copy host to 1D Array - + + - + - Copies from device memory in one context to device memory in another context + Asynchron copy host to 1D Array - Destination context - Source pointer to device memory - Source context + + - + - Copies from device memory in one context to device memory in another context + Asynchron copy host to 1D Array - Destination context - Source pointer to device memory - Source context + + - + - Async-Copies from device memory in one context to device memory in another context + Asynchron copy host to 1D Array - Destination context - Source pointer to device memory - Source context + + - + - Async-Copies from device memory in one context to device memory in another context + Asynchron copy 1D Array to host - Destination context - Source pointer to device memory - Source context + + - + - Access array elements directly from host. - Each single access invokes a device to host or host to device copy. Access is therefor rather slow. + Asynchron copy 1D Array to host - index in elements - + + - + - Device pointer + Asynchron copy 1D Array to host + + - + - Size in bytes + Asynchron copy 1D Array to host + + + - + - Type size in bytes + Asynchron Copy host to device + + - + - Size in elements + Asynchron Copy host to device + + - + - If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + Asynchron copy device to host + + - + - Converts a device variable to a host array + Asynchron copy device to host - device variable - newly allocated host array with values from device memory + + - + - Converts a device variable to a host value. In case of multiple device values, only the first value is copied. + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - device variable - newly allocated host variable with value from device memory + Device Pointer - + - Converts a host array to a newly allocated device variable. + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - host array - newly allocated device variable with values from host memory + - - - Converts a host array to a newly allocated device variable. + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . - host array - newly allocated device variable with values from host memory - + - Gets a null-pointer equivalent + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: cuDoubleReal - + - A CUDA exception is thrown if a CUDA Driver API method call does not return + Creates a new CudaRegisteredHostMemory_cuDoubleReal from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + must be page size aligned (4KBytes) + In elements - + - + For dispose - + - + Dispose - - - + - + For IDisposable - + - + - + Pointer to pinned host memory. - - + - + Size in bytes - - - + - + Size in elements - - - - + - + Returns register status - - + - + Access array per element. - - + index in elements + - + - + Synchron copy host to 1D Array + + - + - Error name as returned by CUDA driver API + Synchron copy host to 1D Array + - + - Error description as returned by CUDA driver API + Synchron copy host to 1D Array + - + - Groupes several wrapped CUgraphicsResources together, so that the map() call to the CUDA API can be efficiently on all - resources together. + Synchron copy host to 1D Array + + - + - Creates a new CudaGraphicsInteropResourceCollection + Synchron copy 1D Array to host + + - + - For dispose + Synchron copy 1D Array to host + - + - Returns the number of resources in the collection + Synchron copy 1D Array to host + - + - Adds a new resource to the collection + Synchron copy 1D Array to host - + + - + - Removes all resources in the collection, an disposes every element. + Synchron copy host to device + - + - Returns true, if the given resource is part of the collection + Synchron copy host to device - - + - + - Throws NotImplementedException. + Synchron copy device to host - - + - + - Removes a resource from the collection. The resource is not disposed. + Synchron copy device to host - - + - + - Dispose + Asynchron copy host to 1D Array + + + - + - For IDisposable + Asynchron copy host to 1D Array - + + - + - Returns the ICudaGraphicsInteropResource at index index. + Asynchron copy host to 1D Array - - + + - + - Maps all graphics resources for access by CUDA. - The resources may be accessed by CUDA until they are unmapped. The graphics API from which the resource - was registered should not access any resources while they are mapped by CUDA. If an application does - so, the results are undefined. - This function provides the synchronization guarantee that any graphics calls issued before - will complete before any subsequent CUDA work issued in stream begins. - If any of the resources is presently mapped for access by CUDA then exception is thrown. + Asynchron copy host to 1D Array + + + - + - Maps all graphics resources for access by CUDA. - The resources may be accessed by CUDA until they are unmapped. The graphics API from which the resource - was registered should not access any resources while they are mapped by CUDA. If an application does - so, the results are undefined. - This function provides the synchronization guarantee that any graphics calls issued before - will complete before any subsequent CUDA work issued in stream begins. - If any of the resources is presently mapped for access by CUDA then exception is thrown. + Asynchron copy 1D Array to host + + - + - Maps all graphics resources for access by CUDA. - The resources may be accessed by CUDA until they are unmapped. The graphics API from which the resource - was registered should not access any resources while they are mapped by CUDA. If an application does - so, the results are undefined. - This function provides the synchronization guarantee that any graphics calls issued before - will complete before any subsequent CUDA work issued in stream begins. - If any of the resources is presently mapped for access by CUDA then exception is thrown. + Asynchron copy 1D Array to host + + - + - Unmaps all graphics resources. - Once unmapped, the resources may not be accessed by CUDA until they are mapped again. - This function provides the synchronization guarantee that any CUDA work issued in stream before - will complete before any subsequently issued graphics work begins. - If any of the resources are not presently mapped for access by CUDA then exception is thrown. + Asynchron copy 1D Array to host + - + - Helper methods used in the wrapper framework + Asynchron copy 1D Array to host + + + - + - Returns the number of channels used in textures depending on the given type. + Asynchron Copy host to device - Type - Number of channels + + - + - Returns the channel size of an CUDA array in bytes. + Asynchron Copy host to device - Channel format - Size in bytes + + - + - CudaLinearTexture2D + Asynchron copy device to host + + - + - Creates a new 2D texture from linear memory. Allocates a new device variable + Asynchron copy device to host - - - - - - - In elements - In elements + + - + - Creates a new 2D texture from linear memory. Allocates a new device variable + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - - - - - - - - In elements - In elements + Device Pointer - + - Creates a new 2D texture from linear memory. + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - - - - - - - + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . + + + - Creates a new 2D texture from linear memory. + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: cuFloatComplex - - - - - - - - - + + + Creates a new CudaRegisteredHostMemory_cuFloatComplex from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + + must be page size aligned (4KBytes) + In elements + + For dispose - + Dispose - + For IDisposable - + - TextureReference + Pointer to pinned host memory. - + - Flags + Size in bytes - + - AddressMode + Size in elements - + - AddressMode + Returns register status - + - Format + Access array per element. + index in elements + - + - Format + Synchron copy host to 1D Array + + - + - Height + Synchron copy host to 1D Array + - + - Width + Synchron copy host to 1D Array + - + - ChannelSize + Synchron copy host to 1D Array + + - + - TotalSizeInBytes + Synchron copy 1D Array to host + + - + - NumChannels + Synchron copy 1D Array to host + - + - Name + Synchron copy 1D Array to host + - + - Module + Synchron copy 1D Array to host + + - + - CUFunction + Synchron copy host to device + - + - Device variable in linear Memory + Synchron copy host to device + - + - Binds a linear address range to the texture reference. - Any previous address or CUDA array state associated with the texture reference is superseded by this function. - Any memory previously bound to the texture reference is unbound. - Size my differ to the previous bound variable, but type must be the same. + Synchron copy device to host - New device variable to bind this texture reference to. + - + - A variable located in CUDA device memory. The data is aligned following + Synchron copy device to host - variable base type + - + - Creates a new CudaPitchedDeviceVariable and allocates the memory on the device + Asynchron copy host to 1D Array - In elements - In elements + + + - + - Creates a new CudaPitchedDeviceVariable and allocates the memory on the device + Asynchron copy host to 1D Array - In elements - In elements - Group pack elements as one type. E.g. 4 floats in host code to one float4 in device code + + - + - Creates a new CudaPitchedDeviceVariable from an existing CUdeviceptr - The CUdeviceptr won't be freed when disposing. + Asynchron copy host to 1D Array - - In elements - In elements - In bytes + + - + - Creates a new CudaPitchedDeviceVariable from an existing CUdeviceptr + Asynchron copy host to 1D Array - - In elements - In elements - In bytes - The CUdeviceptr will be freed while disposing if the CudaPitchedDeviceVariable is the owner + + + - + - For dispose + Asynchron copy 1D Array to host + + + - + - Dispose + Asynchron copy 1D Array to host + + - + - For IDisposable + Asynchron copy 1D Array to host - + + - + - Copy from device to device memory + Asynchron copy 1D Array to host - Source + + + - + - Copy from device to device memory + Asynchron Copy host to device - Source - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + + - + - Copy from device to device memory + Asynchron Copy host to device - Source + + - + - Copy from device to device memory + Asynchron copy device to host - Source - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + + - + - Copy from device to device memory + Asynchron copy device to host - Source + + - + - Copy from device to device memory + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - Source - Source pitch + Device Pointer - + - Copy from device to device memory + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - Source - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + - - - Copy from Host to device memory + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . - Source - + - Copy from Host to device memory + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: cuFloatReal - Source - Width in bytes - Height in elements - + - Copy from host to device memory + Creates a new CudaRegisteredHostMemory_cuFloatReal from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! - Source - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + must be page size aligned (4KBytes) + In elements - + - Copy from Host to device memory + For dispose - Source - + - Copy from Host to device memory + Dispose - Source - Width in elements - Height in elements - + - Copy from host to device memory + For IDisposable - Source - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + - + - Copy from Host to device memory. Assumes that aHostDest has no additional line padding. + Pointer to pinned host memory. - Source - + - Copy from host to device memory + Size in bytes - Source - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch - + - Copy data from device to host memory + Size in elements - IntPtr to destination in host memory - + - Copy data from device to host memory + Returns register status - IntPtr to destination in host memory - Width in bytes - Height in elements - + - Copy data from device to host memory + Access array per element. - Destination - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + index in elements + - + - Copy data from device to host memory + Synchron copy host to 1D Array - Destination + + - + - Copy data from this device to host memory + Synchron copy host to 1D Array - Destination - Width in elements - Height in elements + - + - Copy data from device to host memory + Synchron copy host to 1D Array - Destination - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + - + - Copy data from device to host memory. Assumes that aHostDest has no additional line padding. + Synchron copy host to 1D Array - Destination + + - + - Copy data from device to host memory + Synchron copy 1D Array to host - Destination - Source X in bytes - Source Y - Destination X in bytes - Destination Y - Width in bytes - Height in elements - Source pitch - Destination pitch + + - + - Async Copy data from device to device memory + Synchron copy 1D Array to host - Source pointer to device memory - + - + - Async Copy data from device to device memory + Synchron copy 1D Array to host - Source - + - + - Async Copy from device to device memory + Synchron copy 1D Array to host - Source - + + - + - Async Copy data from device to device memory (1D Copy, copies destination pitch * height bytes data) + Synchron copy host to device - Source pointer to device memory - + - + - Async Copy data from device to device memory (1D Copy, copies destination pitch * height bytes data) + Synchron copy host to device - Source - + - + - Async Copy from device to device memory + Synchron copy device to host - Source - + - + - Memset + Synchron copy device to host - + - + - Memset + Asynchron copy host to 1D Array - + + + - + - Memset + Asynchron copy host to 1D Array - + + - + - Memset + Asynchron copy host to 1D Array - + - + - Memset + Asynchron copy host to 1D Array - + + - + - Memset + Asynchron copy 1D Array to host - + + - + - Copies from device memory in one context to device memory in another context + Asynchron copy 1D Array to host - Destination context - Source pointer to device memory - Source context + + - + - Copies from device memory in one context to device memory in another context + Asynchron copy 1D Array to host - Destination context - Source pointer to device memory - Source context + + - + - Async-Copies from device memory in one context to device memory in another context + Asynchron copy 1D Array to host - Destination context - Source pointer to device memory - Source context + + - + - Async-Copies from device memory in one context to device memory in another context + Asynchron Copy host to device - Destination context - Source pointer to device memory - Source context + - + - Access array elements directly from host. - Each single access invokes a device to host or host to device copy. Access is therefor rather slow. + Asynchron Copy host to device - X-index in elements - Y-index in elements - + + - + - Device pointer + Asynchron copy device to host + + - + - Width in elements + Asynchron copy device to host + + - + - Width in bytes + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag + Device Pointer - + - Height in elements + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with + - + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . + + + - Pitch in bytes + A variable located in page locked (pinned) host memory. Use this type of variabe for asynchronous memcpy. + cuMemHostRegister doesn't work with managed memory (e.g. normal C# arrays). But you can use cuMemHostRegister for + natively allocated memory (Marshal.AllocHGlobal, or a native dll). + Type: dim3 - + - Total size in bytes (Pitch * Height) + Creates a new CudaRegisteredHostMemory_dim3 from an existing IntPtr. IntPtr must be page size aligned (4KBytes)! + must be page size aligned (4KBytes) + In elements - + - Type size in bytes + For dispose - + - Converts a device variable to a host array + Dispose - device variable - newly allocated host array with values from device memory - + - Measures via CUDA events the timespan between Start() and Stop() calls. + For IDisposable + - + - + Pointer to pinned host memory. - + - + Size in bytes - + - + Size in elements - + - + Returns register status - + - For dispose + Access array per element. + index in elements + - + - Dispose + Synchron copy host to 1D Array + + - + - For IDisposable + Synchron copy host to 1D Array - + - + - Start measurement + Synchron copy host to 1D Array + - + - Stop measurement + Synchron copy host to 1D Array + + - + - Get elapsed time in milliseconds, sync on stop event + Synchron copy 1D Array to host - Elapsed time in ms + + - + - Get elapsed time in milliseconds, no sync on stop event + Synchron copy 1D Array to host - Elapsed time in ms + - + - Returns the inner start event + Synchron copy 1D Array to host + - + - Returns the inner stop event + Synchron copy 1D Array to host + + - + - Returns the inner stream + Synchron copy host to device + - + - Wrapps a CUstream handle. - In case of a so called NULL stream, use the native CUstream struct instead. + Synchron copy host to device + - + - Creates a new Stream using + Synchron copy device to host + - + - Creates a new wrapper for an existing stream + Synchron copy device to host + - + - Creates a new Stream + Asynchron copy host to 1D Array - Parameters for stream creation (must be ) + + + - + - Creates a new Stream using and with the given priority - This API alters the scheduler priority of work in the stream. Work in a higher priority stream - may preempt work already executing in a low priority stream. - priority follows a convention where lower numbers represent higher priorities. - '0' represents default priority. + Asynchron copy host to 1D Array - Stream priority. Lower numbers represent higher priorities. + + - + - Creates a new Stream using and with the given priority - This API alters the scheduler priority of work in the stream. Work in a higher priority stream - may preempt work already executing in a low priority stream. - priority follows a convention where lower numbers represent higher priorities. - '0' represents default priority. + Asynchron copy host to 1D Array - Stream priority. Lower numbers represent higher priorities. - Parameters for stream creation (must be ) + + - + - For dispose + Asynchron copy host to 1D Array + + + - + - Dispose + Asynchron copy 1D Array to host + + + - + - For IDisposable + Asynchron copy 1D Array to host - + + - + - returns the wrapped CUstream handle + Asynchron copy 1D Array to host + + - + - Waits until the device has completed all operations in the stream. If the context was created - with the flag, the CPU thread will block until the stream is finished with all of its - tasks. + Asynchron copy 1D Array to host + + + - + - Returns true if all operations in the stream have completed, or - false if not. + Asynchron Copy host to device - + + - + - Make a compute stream wait on an event - Makes all future work submitted to the Stream wait until hEvent - reports completion before beginning execution. This synchronization - will be performed efficiently on the device. - - The stream will wait only for the completion of the most recent - host call to on hEvent. Once this call has returned, - any functions (including and may be - called on hEvent again, and the subsequent calls will not have any - effect on this stream. - - If hStream is 0 (the NULL stream) any future work submitted in any stream - will wait for hEvent to complete before beginning execution. This - effectively creates a barrier for all future work submitted to the context. - - If has not been called on hEvent, this call acts as if - the record has already completed, and so is a functional no-op. + Asynchron Copy host to device - + + - - - Adds a callback to be called on the host after all currently enqueued - items in the stream have completed. For each - cuStreamAddCallback call, the callback will be executed exactly once. - The callback will block later work in the stream until it is finished. - - The callback may be passed or an error code. In the event - of a device error, all subsequently executed callbacks will receive an - appropriate . - - Callbacks must not make any CUDA API calls. Attempting to use a CUDA API - will result in . Callbacks must not perform any - synchronization that may depend on outstanding device work or other callbacks - that are not mandated to run earlier. Callbacks without a mandated order - (in independent streams) execute in undefined order and may be serialized. - - This API requires compute capability 1.1 or greater. See - cuDeviceGetAttribute or ::cuDeviceGetProperties to query compute - capability. Attempting to use this API with earlier compute versions will - return . + + + Asynchron copy device to host - The function to call once preceding stream operations are complete - User specified data to be passed to the callback function. Use GCAlloc to pin a managed object - Callback flags (must be CUStreamAddCallbackFlags.None) + + - - - Here the Stream is the NULL stream - Adds a callback to be called on the host after all currently enqueued - items in the stream have completed. For each - cuStreamAddCallback call, the callback will be executed exactly once. - The callback will block later work in the stream until it is finished. - - The callback may be passed or an error code. In the event - of a device error, all subsequently executed callbacks will receive an - appropriate . - - Callbacks must not make any CUDA API calls. Attempting to use a CUDA API - will result in . Callbacks must not perform any - synchronization that may depend on outstanding device work or other callbacks - that are not mandated to run earlier. Callbacks without a mandated order - (in independent streams) execute in undefined order and may be serialized. - - This API requires compute capability 1.1 or greater. See - cuDeviceGetAttribute or ::cuDeviceGetProperties to query compute - capability. Attempting to use this API with earlier compute versions will - return . + + + Asynchron copy device to host - The function to call once preceding stream operations are complete - User specified data to be passed to the callback function. Use GCAlloc to pin a managed object - Callback flags (must be CUStreamAddCallbackFlags.None) + + - + - Query the priority of this stream + Returns the CUdeviceptr for pinned host memory mapped to device memory space. Only valid if context is created with flag - the stream's priority + Device Pointer - + - Query the flags of this stream. + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - the stream's flags - The value returned in flags is a logical 'OR' of all flags that - were used while creating this stream. + - + + + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . + + + - Wait on a memory location - Enqueues a synchronization of the stream on the given memory location. Work - ordered after the operation will block until the given condition on the - memory is satisfied. By default, the condition is to wait for (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. - - Other condition types can be specified via \p flags. - - If the memory was registered via ::cuMemHostRegister(), the device pointer - should be obtained with::cuMemHostGetDevicePointer(). This function cannot - be used with managed memory(::cuMemAllocManaged). - - Support for this can be queried with ::cuDeviceGetAttribute() and - ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The only requirement for basic - support is that on Windows, a device must be in TCC mode. + Cuda Surface Object - The memory location to wait on. - The value to compare with the memory location. - See::CUstreamWaitValue_flags. - + - Wait on a memory location - Enqueues a synchronization of the stream on the given memory location. Work - ordered after the operation will block until the given condition on the - memory is satisfied. By default, the condition is to wait for (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. - - Other condition types can be specified via \p flags. - - If the memory was registered via ::cuMemHostRegister(), the device pointer - should be obtained with::cuMemHostGetDevicePointer(). This function cannot - be used with managed memory(::cuMemAllocManaged). - - Support for this can be queried with ::cuDeviceGetAttribute() and - ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The requirements are - compute capability 7.0 or greater, and on Windows, that the device be in - TCC mode. - - The memory location to wait on. - The value to compare with the memory location. - See::CUstreamWaitValue_flags. - - - - Write a value to memory - - Write a value to memory.Unless the ::CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER - flag is passed, the write is preceded by a system-wide memory fence, - equivalent to a __threadfence_system() but scoped to the stream - rather than a CUDA thread. - - If the memory was registered via ::cuMemHostRegister(), the device pointer - should be obtained with::cuMemHostGetDevicePointer(). This function cannot - be used with managed memory(::cuMemAllocManaged). - - Support for this can be queried with ::cuDeviceGetAttribute() and - ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The only requirement for basic - support is that on Windows, a device must be in TCC mode. - - The device address to write to. - The value to write. - See::CUstreamWriteValue_flags. - - - - Write a value to memory - - Write a value to memory.Unless the ::CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER - flag is passed, the write is preceded by a system-wide memory fence, - equivalent to a __threadfence_system() but scoped to the stream - rather than a CUDA thread. - - If the memory was registered via ::cuMemHostRegister(), the device pointer - should be obtained with::cuMemHostGetDevicePointer(). This function cannot - be used with managed memory(::cuMemAllocManaged). - - Support for this can be queried with ::cuDeviceGetAttribute() and - ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The requirements are - compute capability 7.0 or greater, and on Windows, that the device be in - TCC mode. + Creates a surface object. ResDesc describes + the data to perform surface load/stores on. ResDesc.resType must be + and ResDesc.hArray + must be set to a valid CUDA array handle. ResDesc.flags must be set to zero. - The device address to write to. - The value to write. - See::CUstreamWriteValue_flags. + CudaResourceDesc - + - CudaLinearTexture1D + Creates a surface object. ResDesc describes + the data to perform surface load/stores on. ResDesc.resType must be + and ResDesc.hArray + must be set to a valid CUDA array handle. + CudaArray1D - + - Creates a new 1D texture from linear memory. Allocates a new device variable + Creates a surface object. ResDesc describes + the data to perform surface load/stores on. ResDesc.resType must be + and ResDesc.hArray + must be set to a valid CUDA array handle. - - - - - - In elements + CudaArray2D - + - Creates a new 1D texture from linear memory. + Creates a surface object. ResDesc describes + the data to perform surface load/stores on. ResDesc.resType must be + and ResDesc.hArray + must be set to a valid CUDA array handle. - - - - - - + CudaArray3D - + For dispose - + Dispose - + For IDisposable - - - TextureReference - - - + - Flags + Returns the wrapped CUsurfObject - + - AddressMode + Returns the CudaResourceDesc used to create the CudaSurfObject - + - Format + Cuda Texure Object - + - Filtermode + Creates a texture object and returns it in pTexObject. pResDesc describes the data to texture from. pTexDesc + describes how the data should be sampled. + CudaResourceDesc + CudaTextureDescriptor - + - Size + Creates a texture object. ResDesc describes the data to texture from. TexDesc + describes how the data should be sampled. resViewDesc is an optional argument that specifies an alternate format + for the data described by pResDesc, and also describes the subresource region to restrict access to when texturing. + pResViewDesc can only be specified if the type of resource is a CUDA array or a CUDA mipmapped array. + Describes the data to texture from. + Describes how the data should be sampled. + CudaResourceViewDesc. Only valid if type of resource is a CUDA array or a CUDA mipmapped array - + - ChannelSize + For dispose - + - TotalSizeInBytes + Dispose - + - NumChannels + For IDisposable + - + - Name + Returns the wrapped CUtexObject - + - Module + Returns the CudaResourceDesc used to create the CudaTexObject - + - CUFunction + Returns the CudaTextureDescriptor used to create the CudaTexObject - + - Device variable in linear Memory + Returns the CudaResourceViewDesc used to create the CudaTexObject - + - Binds a linear address range to the texture reference. - Any previous address or CUDA array state associated with the texture reference is superseded by this function. - Any memory previously bound to the texture reference is unbound. - Size my differ to the previous bound variable, but type must be the same. + Provides methods to bind texture references to kernels - New device variable to bind this texture reference to. - + - CUDA device properties + Create a new CudaDeviceVariable and bind it to a texture reference. + + + + + + In elements - + - Typical clock frequency in kilohertz + Bind a CudaDeviceVariable to a texture reference. + + + + + + - + - Maximum block dimensions + Create a new CudaPitchedDeviceVariable and bind it to a texture reference. + + + + + + + In elements + In elements - + - Maximum grid dimensions + Create a new CudaPitchedDeviceVariable and bind it to a texture reference. + + + + + + + + In elements + In elements - + - Maximum number of threads per block + Bind a CudaPitchedDeviceVariable to a texture reference. + + + + + + + - + - Maximum pitch in bytes allowed by memory copies + Bind a CudaPitchedDeviceVariable to a texture reference. + + + + + + + + - + - Maximum number of 32-bit registers available per block + Create a new CudaArray1D and bind it to a texture reference. + + + + + + + In elements + - + - Maximum shared memory available per block in bytes + Bind a CudaArray1D to a texture reference. + + + + + + - + - Alignment requirement for textures + Create a new CudaArray2D and bind it to a texture reference. + + + + + + + In elements + In elements + 1,2 or 4 - + - Memory available on device for __constant__ variables in a CUDA C kernel in bytes + Create a new CudaArray2D and bind it to a texture reference. + + + + + + + + In elements + In elements + 1,2 or 4 - + - Name of the device + Bind a CudaArray2D to a texture reference. + + + + + + - + - Driver version + Bind a CudaArray2D to a texture reference. + + + + + + + - + - Total amount of global memory on the device + Create a new CudaArray3D and bind it to a texture reference. + + + + + + + In elements + In elements + In elements + 1,2 or 4 - + - Number of multiprocessors on device + Create a new CudaArray3D and bind it to a texture reference. + + + + + + + + + In elements + In elements + In elements + 1,2 or 4 - + - Warp size in threads (also called SIMDWith) + Bind a CudaArray3D to a texture reference. + + + + + + - + - Device can possibly copy memory and execute a kernel concurrently + Bind a CudaArray3D to a texture reference. + + + + + + + + - + - Specifies whether there is a run time limit on kernels + Create a new CudaMipmappedArray and bind it to a texture reference. + + + + + + + + + + + + - + - Device is integrated with host memory + Create a new CudaMipmappedArray and bind it to a texture reference. + + + + + + + + + + + + + + - + - Device can map host memory into CUDA address space + Bind a CudaMipmappedArray to a texture reference. + + + + + + + + + + + - + - Compute mode (See CUComputeMode for details) + Bind a CudaMipmappedArray to a texture reference. + + + + + + + + + + + + + - + - Maximum 1D texture width + Create a new CudaDeviceVariable and bind it to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + RGBA color + + + In elements - + - Maximum 2D texture width + Bind a CudaDeviceVariable to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + RGBA color - + - Maximum 2D texture height + Create a new CudaPitchedDeviceVariable and bind it to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + In elements + In elements + RGBA color - + - Maximum 3D texture width + Bind a CudaPitchedDeviceVariable to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + + RGBA color - + - Maximum 3D texture height + Create a new CudaArray1D and bind it to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + In elements + + RGBA color - + - Maximum 3D texture depth + Bind a CudaArray1D to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + RGBA color - + - Maximum texture array width + Create a new CudaArray2D and bind it to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + In elements + In elements + 1,2 or 4 + RGBA color - + - Maximum texture array height + Bind a CudaArray2D to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + RGBA color - + - Maximum slices in a texture array + Create a new CudaArray3D and bind it to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + In elements + In elements + In elements + 1,2 or 4 + RGBA color - + - Alignment requirement for surfaces + Bind a CudaArray3D to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + RGBA color - + - Device can possibly execute multiple kernels concurrently + Create a new CudaMipmappedArray and bind it to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + + + + + + + RGBA color - + - Device has ECC support enabled + Bind a CudaMipmappedArray to a texture reference. + Sets the border color for the texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + addressMode is set to CU_TR_ADDRESS_MODE_BORDER + + + + + + + + + + + RGBA color - + - PCI bus ID of the device + CudaArrayTexture1D - + - PCI device ID of the device + Creates a new 1D texture from array memory. Allocates new array. + + + + + + + In elements + - + - Device is using TCC driver model + Creates a new 1D texture from array memory + + + + + + - + - Peak memory clock frequency in kilohertz + For dispose - + - Global memory bus width in bits + Dispose - + - Size of L2 cache in bytes + For IDisposable + - + - Maximum resident threads per multiprocessor + TextureReference - + - Number of asynchronous engines + Flags - + - Device shares a unified address space with the host + AddressMode - + - Maximum 1D layered texture width + Format - + - Maximum layers in a 1D layered texture + Format - + - PCI domain ID of the device + Size - + - Pitch alignment requirement for textures + ChannelSize - + - Maximum cubemap texture width/height + TotalSizeInBytes - + - Maximum cubemap layered texture width/height + NumChannels - + - Maximum layers in a cubemap layered texture + Name - + - Maximum 1D surface width + Module - + - Maximum 2D surface width + CUFuntion - + - Maximum 2D surface height + Array - + - Maximum 3D surface width + CudaArrayTexture2D - + - Maximum 3D surface height + Creates a new 2D texture from array memory. Allocates a new 2D array. + + + + + + + In elements + In elements + 1,2 or 4 - + - Maximum 3D surface depth + Creates a new 2D texture from array memory. Allocates a new 2D array. + + + + + + + + In elements + In elements + 1,2 or 4 - + - Maximum 1D layered surface width + Creates a new 2D texture from array memory + + + + + + - + - Maximum layers in a 1D layered surface + Creates a new 2D texture from array memory + + + + + + + - + - Maximum 2D layered surface width + For dispose - + - Maximum 2D layered surface height + Dispose - + - Maximum layers in a 2D layered surface + For IDisposable + - + - Maximum cubemap surface width + TextureReference - + - Maximum cubemap layered surface width + Flags - + - Maximum layers in a cubemap layered surface + AddressMode - + - Maximum 1D linear texture width + AddressMode - + - Maximum 2D linear texture width + Format - + - Maximum 2D linear texture height + Format - + - Maximum 2D linear texture pitch in bytes + Height - + - Maximum mipmapped 2D texture width + Width - + - Maximum mipmapped 2D texture height + ChannelSize - + - Major compute capability version number + TotalSizeInBytes - + - Minor compute capability version number + NumChannels - + - Compute capability version number + Name - + - Maximum mipmapped 1D texture width + Module - + - Device supports stream priorities + CUFuntion - + - Device supports caching globals in L1 + Array - + - Device supports caching locals in L1 + CudaArrayTexture3D - + - Maximum shared memory available per multiprocessor in bytes + Creates a new 3D texture from array memory. Allocates a new 3D array. + + + + + + + In elements + In elements + In elements + 1,2 or 4 - + - Maximum number of 32-bit registers available per multiprocessor + Creates a new 3D texture from array memory. Allocates a new 3D array. + + + + + + + + + In elements + In elements + In elements + 1,2 or 4 - + - Device can allocate managed memory on this system + Creates a new 3D texture from array memory + + + + + + - + - Device is on a multi-GPU board + Creates a new 3D texture from array memory + + + + + + + + - + - Unique id for a group of devices on the same multi-GPU board + For dispose - + - Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware) + Dispose - + - Ratio of single precision performance (in floating-point operations per second) to double precision performance + For IDisposable + - + - Device supports coherently accessing pageable memory without calling cudaHostRegister on it + TextureReference - + - Device can coherently access managed memory concurrently with the CPU + Flags - + - Device supports compute preemption. + AddressMode - + - Device can access host registered memory at the same virtual address as the CPU. + AddressMode - + - cuStreamBatchMemOp and related APIs are supported. + AddressMode - + - 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. + Format - + - CU_STREAM_WAIT_VALUE_NOR is supported. + Filtermode - + - Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel + Depth - + - Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice + Height - + - Maximum optin shared memory per block + Width - + - Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. + ChannelSize - + - Device supports host memory registration via ::cudaHostRegister. + TotalSizeInBytes - + - Device accesses pageable memory via the host's page tables. + NumChannels - + - The host can directly access managed memory on the device without migration. + Name - + - Direct3D 9 Interoperability + Module - + - Direct3D9 Interoperability for CUDA 3.x + CUFuntion - + - Returns in pCudaDevice the CUDA-compatible device corresponding to the adapter name pszAdapterName - obtained from EnumDisplayDevices() or IDirect3D9::GetAdapterIdentifier(). - If no device on the adapter with name pszAdapterName is CUDA-compatible, then the call will fail. + Array - Returned CUDA device corresponding to pszAdapterName - Adapter name to query for device - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Gets the CUDA devices corresponding to a Direct3D 9 device - Returns in pCudaDeviceCount the number of CUDA-compatible device corresponding - to the Direct3D 9 device pD3D9Device. - Also returns in pCudaDevices at most cudaDeviceCount of the the CUDA-compatible devices - corresponding to the Direct3D 9 device pD3D9Device. - - If any of the GPUs being used to render pDevice are not CUDA capable then the - call will return . + A variable located in CUDA device memory - Returned number of CUDA devices corresponding to pD3D9Device - Returned CUDA devices corresponding to pD3D9Device - The size of the output device array pCudaDevices - Direct3D 9 device to query for CUDA devices - The set of devices to return. - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + variable base type - + - Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and - associates the created CUDA context with the calling thread. The created will be returned in pCtx. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - If pCudaDevice is non-NULL then the on which this CUDA context was created will be returned in - pCudaDevice. - On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented - upon destruction of this context through . This context will cease to function if pD3DDevice - is destroyed or encounters an error. + Creates a new CudaDeviceVariable and allocates the memory on the device - Returned newly created CUDA context - Returned pointer to the device on which the context was created - Context creation flags (see for details) - Direct3D device to create interoperability context with - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + In elements - + - Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and - associates the created CUDA context with the calling thread. The created will be returned in pCtx. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented - upon destruction of this context through . This context will cease to function if pD3DDevice - is destroyed or encounters an error. + Allocates memory with stream ordered semantics + Inserts an allocation operation into \p hStream. + A pointer to the allocated memory is returned immediately in *dptr. + The allocation must not be accessed until the the allocation operation completes. + The allocation comes from the memory pool current to the stream's device. + + note The default memory pool of a device contains device memory from that device. + note Basic stream ordering allows future work submitted into the same stream to use the allocation. + Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation + operation completes before work submitted in a separate stream runs. - Returned newly created CUDA context - Context creation flags (see for details) - Direct3D device to create interoperability context with - Returned pointer to the device on which the context was created - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + In elements + - + - Registers the Direct3D 9 resource pD3DResource for access by CUDA and returns a CUDA handle to - pD3Dresource in pCudaResource. The handle returned in pCudaResource may be used to map and - unmap this resource until it is unregistered. On success this call will increase the internal reference count on - pD3DResource. This reference count will be decremented when this resource is unregistered through . - This call is potentially high-overhead and should not be called every frame in interactive applications. - The type of pD3DResource must be one of the following: - - Type of pD3DResourceRestriction - IDirect3DVertexBuffer9 - May be accessed through a device pointer. - - IDirect3DIndexBuffer9 - May be accessed through a device pointer. - - IDirect3DSurface9 - May be accessed through an array. Only stand-alone objects of type IDirect3DSurface9 - may be explicitly shared. In particular, individual mipmap levels and faces of cube maps may not be registered - directly. To access individual surfaces associated with a texture, one must register the base texture object. - - IDirect3DBaseTexture9 - Individual surfaces on this texture may be accessed through an array. - - - The Flags argument may be used to specify additional parameters at register time. The only valid value for this - parameter is . - Not all Direct3D resources of the above types may be used for interoperability with CUDA. The following are some - limitations. - • The primary rendertarget may not be registered with CUDA. - • Resources allocated as shared may not be registered with CUDA. - • Textures which are not of a format which is 1, 2, or 4 channels of 8, 16, or 32-bit integer or floating-point data - cannot be shared. - • Surfaces of depth or stencil formats cannot be shared. - If Direct3D interoperability is not initialized for this context using then - is returned. If pD3DResource is of incorrect type or is already registered then - is returned. If pD3DResource cannot be registered then - is returned. If Flags is not one of the above specified value then - is returned. + Creates a new CudaDeviceVariable from an existing CUdeviceptr. The allocated size is gethered via the CUDA API. + devPtr won't be freed while disposing. - Returned graphics resource handle - Direct3D resource to register - Parameters for resource registration - CUDA Error Codes: , , , - , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + - + - Returns in ppD3DDevice the Direct3D device against which this CUDA context - was created in . + Creates a new CudaDeviceVariable from an existing CUdeviceptr. The allocated size is gethered via the CUDA API. - Returned Direct3D device corresponding to CUDA context - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + + The CUdeviceptr will be freed while disposing, if the CudaDeviceVariable is the owner - + - Direct3D 10 Interoperability + Creates a new CudaDeviceVariable from an existing CUdeviceptr. + devPtr won't be freed while disposing. + + Size in Bytes - + - Direct3D10 Interoperability for CUDA 3.x + Creates a new CudaDeviceVariable from an existing CUdeviceptr. + + The CUdeviceptr will be freed while disposing, if the CudaDeviceVariable is the owner + Size in Bytes - + - Returns in device the CUDA-compatible device corresponding to the adapter pAdapter obtained from - IDXGIFactory::EnumAdapters. This call will succeed only if a device on adapter pAdapter is Cuda-compatible. + Creates a new CudaDeviceVariable from definition in cu-file. - Returned CUDA device corresponding to pszAdapterName - Adapter (type: IDXGIAdapter) - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + The module where the variable is defined in. + The variable name as defined in the cu-file. - + - Gets the CUDA devices corresponding to a Direct3D 10 device - Returns in pCudaDeviceCount the number of CUDA-compatible device corresponding - to the Direct3D 10 device pD3D10Device. - Also returns in pCudaDevices at most cudaDeviceCount of the the CUDA-compatible devices - corresponding to the Direct3D 10 device pD3D10Device. - - If any of the GPUs being used to render pDevice are not CUDA capable then the - call will return . + Creates a new CudaDeviceVariable from definition in cu-file. - Returned number of CUDA devices corresponding to pD3D9Device - Returned CUDA devices corresponding to pD3D9Device - The size of the output device array pCudaDevices - Direct3D 10 device to query for CUDA devices - The set of devices to return. - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + The kernel which module defines the variable. + The variable name as defined in the cu-file. - + - Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and - associates the created CUDA context with the calling thread. The created will be returned in pCtx. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - If pCudaDevice is non-NULL then the on which this CUDA context was created will be returned in - pCudaDevice. - On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented - upon destruction of this context through . This context will cease to function if pD3DDevice - is destroyed or encounters an error. + For dispose - Returned newly created CUDA context - Returned pointer to the device on which the context was created - Context creation flags (see for details) - Direct3D device to create interoperability context with - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and - associates the created CUDA context with the calling thread. The created will be returned in pCtx. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented - upon destruction of this context through . This context will cease to function if pD3DDevice - is destroyed or encounters an error. + Dispose - Returned newly created CUDA context - Context creation flags (see for details) - Direct3D device to create interoperability context with - Returned pointer to the device on which the context was created - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Registers the Direct3D 10 resource pD3DResource for access by CUDA and returns a CUDA handle to - pD3Dresource in pCudaResource. The handle returned in pCudaResource may be used to map and - unmap this resource until it is unregistered. On success this call will increase the internal reference count on - pD3DResource. This reference count will be decremented when this resource is unregistered through . - This call is potentially high-overhead and should not be called every frame in interactive applications. - The type of pD3DResource must be one of the following: - - Type of pD3DResourceRestriction - ID3D10Buffer - May be accessed through a device pointer. - - ID3D10Texture1D - Individual subresources of the texture may be accessed via arrays. - - ID3D10Texture2D - Individual subresources of the texture may be accessed via arrays. - - ID3D10Texture3D - Individual subresources of the texture may be accessed via arrays. - - - The Flags argument may be used to specify additional parameters at register time. The only valid value for this - parameter is . - Not all Direct3D resources of the above types may be used for interoperability with CUDA. The following are some - limitations. - • The primary rendertarget may not be registered with CUDA. - • Resources allocated as shared may not be registered with CUDA. - • Textures which are not of a format which is 1, 2, or 4 channels of 8, 16, or 32-bit integer or floating-point data - cannot be shared. - • Surfaces of depth or stencil formats cannot be shared. - If Direct3D interoperability is not initialized for this context using then - is returned. If pD3DResource is of incorrect type or is already registered then - is returned. If pD3DResource cannot be registered then - is returned. If Flags is not one of the above specified value then - is returned. + Dispose Async: + Frees memory with stream ordered semantics + Inserts a free operation into \p hStream. + The allocation must not be accessed after stream execution reaches the free. + After this API returns, accessing the memory from any subsequent work launched on the GPU + or querying its pointer attributes results in undefined behavior. - Returned graphics resource handle - Direct3D resource to register - Parameters for resource registration - CUDA Error Codes: , , , - , , , , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in ppD3DDevice the Direct3D device against which this CUDA context - was created in . + For IDisposable - Returned Direct3D device corresponding to CUDA context - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + - + - Direct3D 11 Interoperability for CUDA 3.x + Copy data from device to device memory + Source pointer to device memory - + - Returns in device the CUDA-compatible device corresponding to the adapter pAdapter obtained from - IDXGIFactory::EnumAdapters. This call will succeed only if a device on adapter pAdapter is Cuda-compatible. + Copy data from device to device memory - Returned CUDA device corresponding to pszAdapterName - Adapter (type: IDXGIAdapter) - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Source pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes - + - Gets the CUDA devices corresponding to a Direct3D 11 device - Returns in pCudaDeviceCount the number of CUDA-compatible device corresponding - to the Direct3D 11 device pD3D11Device. - Also returns in pCudaDevices at most cudaDeviceCount of the the CUDA-compatible devices - corresponding to the Direct3D 11 device pD3D11Device. - - If any of the GPUs being used to render pDevice are not CUDA capable then the - call will return . + Copy data from device to device memory - Returned number of CUDA devices corresponding to pD3D9Device - Returned CUDA devices corresponding to pD3D11Device - The size of the output device array pCudaDevices - Direct3D 11 device to query for CUDA devices - The set of devices to return. - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Source - + - Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and - associates the created CUDA context with the calling thread. The created will be returned in pCtx. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - If pCudaDevice is non-NULL then the on which this CUDA context was created will be returned in - pCudaDevice. - On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented - upon destruction of this context through . This context will cease to function if pD3DDevice - is destroyed or encounters an error. + Copy data from device to device memory - Returned newly created CUDA context - Returned pointer to the device on which the context was created - Context creation flags (see for details) - Direct3D device to create interoperability context with - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Source + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes - + - Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and - associates the created CUDA context with the calling thread. The created will be returned in pCtx. - Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. - On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented - upon destruction of this context through . This context will cease to function if pD3DDevice - is destroyed or encounters an error. + Copy from device to device memory - Returned newly created CUDA context - Context creation flags (see for details) - Direct3D device to create interoperability context with - Returned pointer to the device on which the context was created - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Source - + - Registers the Direct3D 11 resource pD3DResource for access by CUDA and returns a CUDA handle to - pD3Dresource in pCudaResource. The handle returned in pCudaResource may be used to map and - unmap this resource until it is unregistered. On success this call will increase the internal reference count on - pD3DResource. This reference count will be decremented when this resource is unregistered through . - This call is potentially high-overhead and should not be called every frame in interactive applications. - The type of pD3DResource must be one of the following: - - Type of pD3DResourceRestriction - ID3D11Buffer - May be accessed through a device pointer. - - ID3D11Texture1D - Individual subresources of the texture may be accessed via arrays. - - ID3D11Texture2D - Individual subresources of the texture may be accessed via arrays. - - ID3D11Texture3D - Individual subresources of the texture may be accessed via arrays. - - - The Flags argument may be used to specify additional parameters at register time. The only valid value for this - parameter is . - Not all Direct3D resources of the above types may be used for interoperability with CUDA. The following are some - limitations. - • The primary rendertarget may not be registered with CUDA. - • Resources allocated as shared may not be registered with CUDA. - • Textures which are not of a format which is 1, 2, or 4 channels of 8, 16, or 32-bit integer or floating-point data - cannot be shared. - • Surfaces of depth or stencil formats cannot be shared. - If Direct3D interoperability is not initialized for this context using then - is returned. If pD3DResource is of incorrect type or is already registered then - is returned. If pD3DResource cannot be registered then - is returned. If Flags is not one of the above specified value then - is returned. + Copy from device to device memory - Returned graphics resource handle - Direct3D resource to register - Parameters for resource registration - CUDA Error Codes: , , , - , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Source + Offset to source pointer in bytes + Offset to destination pointer in bytes + Width of 2D memory to copy in bytes + Height in elements - + - Returns in ppD3DDevice the Direct3D device against which this CUDA context - was created in . + Copy data from host to device memory - Returned Direct3D device corresponding to CUDA context - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + Source pointer to host memory - + - C# wrapper for the NVIDIA CUDA Driver API (--> cuda.h) + Copy data from host to device memory + Source pointer to host memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes - + - Gives the version of the wrapped api + Copy data from host to device memory + Source pointer to host memory - + - Initializes the driver API and must be called before any other function from the driver API. Currently, - the Flags parameter must be . If has not been called, any function from the driver API will return - . + Copy data from host to device memory - Before any call to the CUDA Driver API can be done, the API must be initialized with cuInit(0). - Currently, Flags must always be . - CUDA Error Codes: , , .Note that this function may also return error codes from previous, asynchronous launches. + Source pointer to host memory + Offset to destination pointer in bytes - + - Returns in driverVersion the version number of the installed CUDA driver. This function automatically returns - if the driverVersion argument is NULL. + Copy data from host to device memory - Returns the CUDA driver version - CUDA Error Codes: , .Note that this function may also return error codes from previous, asynchronous launches. + Source pointer to host memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes - + - Combines all API calls for device management + Copy data from host to device memory + Source pointer to host memory + Offset to destination pointer in bytes - + - Returns in device a device handle given an ordinal in the range [0, -1]. + Copy data from host to device memory - Returned device handle - Device number to get handle for - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Source pointer to host memory - + - Returns in count the number of devices with compute capability greater than or equal to 2.0 that are available for - execution. If there is no such device, returns 0. + Copy from Host to device memory. Array elements can be of any (value)type, but total size in bytes must match to allocated device memory. - Returned number of compute-capable devices - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Source - + - Returns an ASCII string identifying the device dev in the NULL-terminated string pointed to by name. len specifies - the maximum length of the string that may be returned. + Copy data from device to host memory - Returned identifier string for the device - Maximum length of string to store in name - Device to get identifier string for - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Destination pointer to host memory - + - Return an UUID for the device - Returns 16-octets identifing the device \p dev in the structure pointed by the \p uuid. + Copy data from device to host memory - Returned UUID - Device to get identifier string for - + Destination pointer to host memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes - + - Return an LUID and device node mask for the device. - Return identifying information (\p luid and \p deviceNodeMask) to allow - matching device with graphics APIs. + Copy data from device to host memory - Returned LUID - Returned device node mask - Device to get identifier string for - - - - - Returns in major and minor the major and minor revision numbers that define the compute capability of the - device dev. - - Major revision number - Minor revision number - Device handle - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Destination data in host memory - + - Returns in bytes the total amount of memory available on the device dev in bytes. + Copy data from device to host memory - Returned memory available on device in bytes - Device handle - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Destination data in host memory + Offset to source pointer in bytes - + - Returns in prop the (basic) properties of device dev. See . + Copy data from device to host memory - Returned properties of device - Device to get properties for - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Destination pointer to host memory - + - Returns in pi the integer value of the attribute attrib on device dev. See . + Copy data from device to host memory - Returned device attribute value - Device attribute to query - Device handle - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Destination data in host memory + Offset to source pointer in bytes - + - Returns in device a device handle given a PCI bus ID string. + Copy data from device to host memory - Returned device handle - String in one of the following forms: - [domain]:[bus]:[device].[function] - [domain]:[bus]:[device] - [bus]:[device].[function] - where domain, bus, device, and function are all hexadecimal values - CUDA Error Codes: , , , - , . + Destination pointer to host memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes - + - Returns an ASCII string identifying the device dev in the NULL-terminated - string pointed to by pciBusId. len specifies the maximum length of the - string that may be returned. + Copy data from this device to host memory. Array elements can be of any (value)type, but total size in bytes must match to allocated device memory. - Returned identifier string for the device in the following format - [domain]:[bus]:[device].[function] - where domain, bus, device, and function are all hexadecimal values. - pciBusId should be large enough to store 13 characters including the NULL-terminator. - Maximum length of string to store in name - Device to get identifier string for - CUDA Error Codes: , , , - , . + Destination - + - Takes as input a previously allocated event. This event must have been - created with the ::CU_EVENT_INTERPROCESS and ::CU_EVENT_DISABLE_TIMING - flags set. This opaque handle may be copied into other processes and - opened with ::cuIpcOpenEventHandle to allow efficient hardware - synchronization between GPU work in different processes. - - After the event has been been opened in the importing process, - ::cuEventRecord, ::cuEventSynchronize, ::cuStreamWaitEvent and - ::cuEventQuery may be used in either process. Performing operations - on the imported event after the exported event has been freed - with ::cuEventDestroy will result in undefined behavior. - - IPC functionality is restricted to devices with support for unified - addressing on Linux operating systems. + Async Copy data from device to device memory - Pointer to a user allocated CUipcEventHandle in which to return the opaque event handle - Event allocated with ::CU_EVENT_INTERPROCESS and ::CU_EVENT_DISABLE_TIMING flags. - CUDA Error Codes: , , , + Source pointer to device memory + - + - Opens an interprocess event handle exported from another process with - ::cuIpcGetEventHandle. This function returns a ::CUevent that behaves like - a locally created event with the ::CU_EVENT_DISABLE_TIMING flag specified. - This event must be freed with ::cuEventDestroy. - - Performing operations on the imported event after the exported event has - been freed with ::cuEventDestroy will result in undefined behavior. - - IPC functionality is restricted to devices with support for unified - addressing on Linux operating systems. + Async Copy data from device to device memory - Returns the imported event - Interprocess handle to open - CUDA Error Codes: , , , + Source + - + - Takes a pointer to the base of an existing device memory allocation created - with ::cuMemAlloc and exports it for use in another process. This is a - lightweight operation and may be called multiple times on an allocation - without adverse effects. - - If a region of memory is freed with ::cuMemFree and a subsequent call - to ::cuMemAlloc returns memory with the same device address, - ::cuIpcGetMemHandle will return a unique handle for the - new memory. - - IPC functionality is restricted to devices with support for unified - addressing on Linux operating systems. + Async Copy from device to device memory - Pointer to user allocated ::CUipcMemHandle to return the handle in. - Base pointer to previously allocated device memory - CUDA Error Codes: , , , + Source + - + - Maps memory exported from another process with ::cuIpcGetMemHandle into - the current device address space. For contexts on different devices - ::cuIpcOpenMemHandle can attempt to enable peer access between the - devices as if the user called ::cuCtxEnablePeerAccess. This behavior is - controlled by the ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. - ::cuDeviceCanAccessPeer can determine if a mapping is possible. - - Contexts that may open ::CUipcMemHandles are restricted in the following way. - ::CUipcMemHandles from each ::CUdevice in a given process may only be opened - by one ::CUcontext per ::CUdevice per other process. - - Memory returned from ::cuIpcOpenMemHandle must be freed with - ::cuIpcCloseMemHandle. - - Calling ::cuMemFree on an exported memory region before calling - ::cuIpcCloseMemHandle in the importing context will result in undefined - behavior. - - IPC functionality is restricted to devices with support for unified - addressing on Linux operating systems. + Async Copy data from device to device memory - Returned device pointer - ::CUipcMemHandle to open - Flags for this operation. Must be specified as ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS - CUDA Error Codes: , , - , , + Source pointer to device memory + - + - Unmaps memory returnd by ::cuIpcOpenMemHandle. The original allocation - in the exporting process as well as imported mappings in other processes - will be unaffected. - - Any resources used to enable peer access will be freed if this is the - last mapping using them. - - IPC functionality is restricted to devices with support for unified - addressing on Linux operating systems. + Async Copy data from device to device memory - Device pointer returned by ::cuIpcOpenMemHandle - CUDA Error Codes: , , - , + Source + - + - Combines all API calls for context management + Async Copy from device to device memory + Source + - + - Creates a new CUDA context and associates it with the calling thread. The flags parameter is described in . The - context is created with a usage count of 1 and the caller of must call or - when done using the context. If a context is already current to the thread, it is supplanted by the newly created context - and may be restored by a subsequent call to . + Async Copy data from device to device memory - Returned context handle of the new context - Context creation flags. See - Device to create context on - CUDA Error Codes: , , , - , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Source pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes + - + - Destroys the CUDA context specified by ctx. If the context usage count is not equal to 1, or the context is current - to any CPU thread other than the current one, this function fails. Floating contexts (detached from a CPU thread via - ) may be destroyed by this function. + Async Copy data from device to device memory - Context to destroy - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Source + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes + - + - Destroys the CUDA context specified by ctx. The context ctx will be destroyed regardless of how many threads it is current to. - It is the responsibility of the calling function to ensure that no API call is issued to ctx while cuCtxDestroy_v2() is executing. - If ctx is current to the calling thread then ctx will also be - popped from the current thread's context stack (as though cuCtxPopCurrent() - were called). If ctx is current to other threads, then ctx will - remain current to those threads, and attempting to access ctx from - those threads will result in the error . + Async Copy data from device to device memory - Context to destroy - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Source pointer to device memory + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes + - + - Increments the usage count of the context and passes back a context handle in pctx that must be passed to - when the application is done with the context. fails if there is no context current to the - thread. Currently, the flags parameter must be . + Async Copy data from device to device memory - Returned context handle of the current context - Context attach flags (must be ) - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Source + Offset to source pointer in bytes + Offset to destination pointer in bytes + Size to copy in bytes + - + - Decrements the usage count of the context ctx, and destroys the context if the usage count goes to 0. The context - must be a handle that was passed back by or , and must be current to the calling thread. + Async Copy from device to device memory - Context to destroy - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + Source + Offset to source pointer in bytes + Offset to destination pointer in bytes + Width of 2D memory to copy in bytes + Height in elements + - + - Pushes the given context ctx onto the CPU thread’s stack of current contexts. The specified context becomes the - CPU thread’s current context, so all CUDA functions that operate on the current context are affected. - The previous current context may be made current again by calling or . - The context must be "floating," i.e. not attached to any thread. Contexts are made to float by calling . + Async Copy from device to device memory - Floating context to attach - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Source + Offset to source pointer in bytes + Offset to destination pointer in bytes + Width of 2D memory to copy in bytes + Height in elements + - + - Pushes the given context ctx onto the CPU thread’s stack of current contexts. The specified context becomes the - CPU thread’s current context, so all CUDA functions that operate on the current context are affected. - The previous current context may be made current again by calling or . - The context must be "floating," i.e. not attached to any thread. Contexts are made to float by calling . + Memset - Floating context to attach - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + - + - Pops the current CUDA context from the CPU thread. The CUDA context must have a usage count of 1. CUDA contexts - have a usage count of 1 upon creation; the usage count may be incremented with and decremented - with . - If successful, passes back the old context handle in pctx. That context may then be made current - to a different CPU thread by calling . - Floating contexts may be destroyed by calling . - If a context was current to the CPU thread before or was called, this function makes - that context current to the CPU thread again. + Memset - Returned new context handle - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + - + - Pops the current CUDA context from the CPU thread. The CUDA context must have a usage count of 1. CUDA contexts - have a usage count of 1 upon creation; the usage count may be incremented with and decremented - with . - If successful, passes back the old context handle in pctx. That context may then be made current - to a different CPU thread by calling . - Floating contexts may be destroyed by calling . - If a context was current to the CPU thread before or was called, this function makes - that context current to the CPU thread again. + Memset - Returned new context handle - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + - + - Binds the specified CUDA context to the calling CPU thread. - If ctx is NULL then the CUDA context previously bound to the - calling CPU thread is unbound and is returned. - - If there exists a CUDA context stack on the calling CPU thread, this - will replace the top of that stack with ctx. - If ctx is NULL then this will be equivalent to popping the top - of the calling CPU thread's CUDA context stack (or a no-op if the - calling CPU thread's CUDA context stack is empty). + Memset - Context to bind to the calling CPU thread - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + + - + - Returns in ctx the CUDA context bound to the calling CPU thread. - If no context is bound to the calling CPU thread then ctx is - set to NULL and is returned. + Memset - Returned context handle - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + + - + - Returns in device the ordinal of the current context’s device. + Memset - Returned device ID for the current context - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + + - + - Blocks until the device has completed all preceding requested tasks. returns an error if one of the - preceding tasks failed. If the context was created with the flag, the CPU thread will - block until the GPU context has finished its work. + Copies from device memory in one context to device memory in another context - CUDA Error Codes: , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + Destination context + Source pointer to device memory + Source context - + - Returns the API version used to create ctx in version. If ctx - is NULL, returns the API version used to create the currently bound - context. - This wil return the API version used to create a context (for example, - 3010 or 3020), which library developers can use to direct callers to a - specific API version. Note that this API version may not be the same as - returned by . + Copies from device memory in one context to device memory in another context - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Destination context + Source pointer to device memory + Source context - + - On devices where the L1 cache and shared memory use the same hardware - resources, this function returns through pconfig the preferred cache configuration - for the current context. This is only a preference. The driver will use - the requested configuration if possible, but it is free to choose a different - configuration if required to execute functions. - This will return a pconfig of on devices - where the size of the L1 cache and shared memory are fixed. + Async-Copies from device memory in one context to device memory in another context - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Destination context + Source pointer to device memory + Source context + - + - On devices where the L1 cache and shared memory use the same hardware - resources, this sets through config the preferred cache configuration for - the current context. This is only a preference. The driver will use - the requested configuration if possible, but it is free to choose a different - configuration if required to execute the function. Any function preference - set via will be preferred over this context-wide - setting. Setting the context-wide cache configuration to - will cause subsequent kernel launches to prefer - to not change the cache configuration unless required to launch the kernel. - This setting does nothing on devices where the size of the L1 cache and - shared memory are fixed. - Launching a kernel with a different preference than the most recent - preference setting may insert a device-side synchronization point. + Async-Copies from device memory in one context to device memory in another context - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Returns the current shared memory configuration for the current context. - - This function will return in \p pConfig the current size of shared memory banks - in the current context. On devices with configurable shared memory banks, - can be used to change this setting, so that all - subsequent kernel launches will by default use the new bank size. When - is called on devices without configurable shared - memory, it will return the fixed bank size of the hardware. - - The returned bank configurations can be either: - - : set shared memory bank width to - be natively four bytes. - - : set shared memory bank width to - be natively eight bytes. - - returned shared memory configuration - CUDA Error Codes: , , , - , . + Destination context + Source pointer to device memory + Source context + - + - Sets the shared memory configuration for the current context. - On devices with configurable shared memory banks, this function will set - the context's shared memory bank size which is used for subsequent kernel - launches. - Changed the shared memory configuration between launches may insert a device - side synchronization point between those launches. - Changing the shared memory bank size will not increase shared memory usage - or affect occupancy of kernels, but may have major effects on performance. - Larger bank sizes will allow for greater potential bandwidth to shared memory, - but will change what kinds of accesses to shared memory will result in bank - conflicts. - This function will do nothing on devices with fixed shared memory bank size. - - The supported bank configurations are: - - : set bank width to the default initial - setting (currently, four bytes). - - : set shared memory bank width to - be natively four bytes. - - : set shared memory bank width to - be natively eight bytes. + Export data to share a memory pool allocation between processes. + Constructs \p shareData_out for sharing a specific allocation from an already shared memory pool. + The recipient process can import the allocation with the::cuMemPoolImportPointer api. + The data is not a handle and may be shared through any IPC mechanism. - requested shared memory configuration - CUDA Error Codes: , , , - , . - + - Returns numerical values that correspond to the least and greatest stream priorities. - Returns in leastPriority and greatestPriority the numerical values that correspond - to the least and greatest stream priorities respectively. Stream priorities - follow a convention where lower numbers imply greater priorities. The range of - meaningful stream priorities is given by [greatestPriority, leastPriority]. - If the user attempts to create a stream with a priority value that is - outside the meaningful range as specified by this API, the priority is - automatically clamped down or up to either leastPriority or greatestPriority - respectively. See ::cuStreamCreateWithPriority for details on creating a - priority stream. - A NULL may be passed in for leastPriority or greatestPriority if the value - is not desired. - This function will return '0' in both leastPriority and greatestPriority if - the current context's device does not support stream priorities - (see ::cuDeviceGetAttribute). + Access array elements directly from host. + Each single access invokes a device to host or host to device copy. Access is therefor rather slow. - Pointer to an int in which the numerical value for least - stream priority is returned - Pointer to an int in which the numerical value for greatest stream priority is returned + index in elements - + - Returns the flags for the current context - Returns in \p *flags the flags of the current context. See ::cuCtxCreate for flag values. + Device pointer - Pointer to store flags of current context - - + - Retain the primary context on the GPU. - Retains the primary context on the device, creating it if necessary, - increasing its usage count. The caller must call - ::cuDevicePrimaryCtxRelease() when done using the context. - Unlike ::cuCtxCreate() the newly created context is not pushed onto the stack. - - Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of - the device is ::CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will - also fail with ::CUDA_ERROR_UNKNOWN if the compute mode for the device is - set to ::CU_COMPUTEMODE_EXCLUSIVE and there is already an active, non-primary, - context on the device. The function ::cuDeviceGetAttribute() can be used with - ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the - device. The nvidia-smi tool can be used to set the compute mode for - devices. Documentation for nvidia-smi can be obtained by passing a - -h option to it. - - Please note that the primary context always supports pinned allocations. Other - flags can be specified by ::cuDevicePrimaryCtxSetFlags(). + Size in bytes - Returned context handle of the new context - Device for which primary context is requested - - + - Release the primary context on the GPU - Releases the primary context interop on the device by decreasing the usage - count by 1. If the usage drops to 0 the primary context of device \p dev - will be destroyed regardless of how many threads it is current to. - - Please note that unlike ::cuCtxDestroy() this method does not pop the context - from stack in any circumstances. + Type size in bytes - Device which primary context is released - - + - Set flags for the primary context - Sets the flags for the primary context on the device overwriting perviously - set ones. If the primary context is already created - ::CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE is returned. - - The three LSBs of the \p flags parameter can be used to control how the OS - thread, which owns the CUDA context at the time of an API call, interacts - with the OS scheduler when waiting for results from the GPU. Only one of - the scheduling flags can be set when creating a context. + Size in elements - Device for which the primary context flags are set - New flags for the device - - + - Get the state of the primary context - Returns in \p *flags the flags for the primary context of \p dev, and in - \p *active whether it is active. See ::cuDevicePrimaryCtxSetFlags for flag - values. + If the wrapper class instance is the owner of a CUDA handle, it will be destroyed while disposing. + + + + + Converts a device variable to a host array + + device variable + newly allocated host array with values from device memory + + + + Converts a device variable to a host value. In case of multiple device values, only the first value is copied. + + device variable + newly allocated host variable with value from device memory + + + + Converts a host array to a newly allocated device variable. + + host array + newly allocated device variable with values from host memory + + + + Converts a host array to a newly allocated device variable. + + host array + newly allocated device variable with values from host memory + + + + Gets a null-pointer equivalent + + + + + A CUDA exception is thrown if a CUDA Driver API method call does not return + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - Device to get primary context flags for - Pointer to store flags - Pointer to store context state; 0 = inactive, 1 = active - + - Destroy all allocations and reset all state on the primary context - Explicitly destroys and cleans up all resources associated with the current - device in the current process. + + + + + + - Note that it is responsibility of the calling function to ensure that no - other module in the process is using the device any more. For that reason - it is recommended to use ::cuDevicePrimaryCtxRelease() in most cases. - However it is safe for other modules to call ::cuDevicePrimaryCtxRelease() - even after resetting the device. - Device for which primary context is destroyed + + + + Error name as returned by CUDA driver API + + + + + Error description as returned by CUDA driver API + + + + + Groupes several wrapped CUgraphicsResources together, so that the map() call to the CUDA API can be efficiently on all + resources together. + + + + + Creates a new CudaGraphicsInteropResourceCollection + + + + + For dispose + + + + + Returns the number of resources in the collection + + + + + Adds a new resource to the collection + + + + + + Removes all resources in the collection, an disposes every element. + + + + + Returns true, if the given resource is part of the collection + + - + - Combines all API calls for module management + Throws NotImplementedException. + + - + - Takes a filename fname and loads the corresponding module module into the current context. The CUDA driver API - does not attempt to lazily allocate the resources needed by a module; if the memory for functions and data (constant - and global) needed by the module cannot be allocated, fails. The file should be a cubin file as output - by nvcc or a PTX file, either as output by nvcc or handwrtten. + Removes a resource from the collection. The resource is not disposed. - Returned module - Filename of module to load - CUDA Error Codes: , , , - , , , - , , , - . - Note that this function may also return error codes from previous, asynchronous launches. + + - + - Takes a byte[] as image and loads the corresponding module module into the current context. The byte array may be obtained - by mapping a cubin or PTX file, passing a cubin or PTX file as a null-terminated text string. - The byte[] is a replacement for the original pointer. + Dispose - Returned module - Module data to load - CUDA Error Codes: , , , - , , - , , - . - Note that this function may also return error codes from previous, asynchronous launches. - + - Takes a byte[] as image and loads the corresponding module module into the current context. The byte array may be obtained - by mapping a cubin or PTX file, passing a cubin or PTX file as a null-terminated text string. - Options are passed as an array via options and any corresponding parameters are passed - in optionValues. The number of total options is supplied via numOptions. Any outputs will be returned via - optionValues. Supported options are definen in . - The options values are currently passed in IntPtr-type and should then be cast into their real type. This might change in future. + For IDisposable - Returned module - Module data to load - Number of options - Options for JIT - Option values for JIT - CUDA Error Codes: , , , - , , - , , - . - Note that this function may also return error codes from previous, asynchronous launches. + - + - Takes a byte[] as fatCubin and loads the corresponding module module into the current context. The byte[] - represents a fat binary object, which is a collection of different cubin files, all representing the same device code, but - compiled and optimized for different architectures. Prior to CUDA 4.0, there was no documented API for constructing and using - fat binary objects by programmers. Starting with CUDA 4.0, fat binary objects can be constructed by providing the -fatbin option to nvcc. - More information can be found in the nvcc document. + Returns the ICudaGraphicsInteropResource at index index. + + + + + + + Maps all graphics resources for access by CUDA. + The resources may be accessed by CUDA until they are unmapped. The graphics API from which the resource + was registered should not access any resources while they are mapped by CUDA. If an application does + so, the results are undefined. + This function provides the synchronization guarantee that any graphics calls issued before + will complete before any subsequent CUDA work issued in stream begins. + If any of the resources is presently mapped for access by CUDA then exception is thrown. + + + + + Maps all graphics resources for access by CUDA. + The resources may be accessed by CUDA until they are unmapped. The graphics API from which the resource + was registered should not access any resources while they are mapped by CUDA. If an application does + so, the results are undefined. + This function provides the synchronization guarantee that any graphics calls issued before + will complete before any subsequent CUDA work issued in stream begins. + If any of the resources is presently mapped for access by CUDA then exception is thrown. + + + + + + Maps all graphics resources for access by CUDA. + The resources may be accessed by CUDA until they are unmapped. The graphics API from which the resource + was registered should not access any resources while they are mapped by CUDA. If an application does + so, the results are undefined. + This function provides the synchronization guarantee that any graphics calls issued before + will complete before any subsequent CUDA work issued in stream begins. + If any of the resources is presently mapped for access by CUDA then exception is thrown. + + + + + Unmaps all graphics resources. + Once unmapped, the resources may not be accessed by CUDA until they are mapped again. + This function provides the synchronization guarantee that any CUDA work issued in stream before + will complete before any subsequently issued graphics work begins. + If any of the resources are not presently mapped for access by CUDA then exception is thrown. + + + + + + Helper methods used in the wrapper framework + + + + + Returns the number of channels used in textures depending on the given type. + + Type + Number of channels + + + + Returns the channel size of an CUDA array in bytes. + + Channel format + Size in bytes + + + + CudaLinearTexture2D + + + + + Creates a new 2D texture from linear memory. Allocates a new device variable + + + + + + + + In elements + In elements + + + + Creates a new 2D texture from linear memory. Allocates a new device variable + + + + + + + + + In elements + In elements + + + + Creates a new 2D texture from linear memory. + + + + + + + + + + + + Creates a new 2D texture from linear memory. + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + TextureReference + + + + + Flags + + + + + AddressMode + + + + + AddressMode + + + + + Format + + + + + Format + + + + + Height + + + + + Width + + + + + ChannelSize + + + + + TotalSizeInBytes + + + + + NumChannels + + + + + Name + + + + + Module + + + + + CUFunction + + + + + Device variable in linear Memory + + + + + Binds a linear address range to the texture reference. + Any previous address or CUDA array state associated with the texture reference is superseded by this function. + Any memory previously bound to the texture reference is unbound. + Size my differ to the previous bound variable, but type must be the same. + + New device variable to bind this texture reference to. + + + + A variable located in CUDA device memory. The data is aligned following + + variable base type + + + + Creates a new CudaPitchedDeviceVariable and allocates the memory on the device + + In elements + In elements + + + + Creates a new CudaPitchedDeviceVariable and allocates the memory on the device + + In elements + In elements + Group pack elements as one type. E.g. 4 floats in host code to one float4 in device code + + + + Creates a new CudaPitchedDeviceVariable from an existing CUdeviceptr + The CUdeviceptr won't be freed when disposing. + + + In elements + In elements + In bytes + + + + Creates a new CudaPitchedDeviceVariable from an existing CUdeviceptr + + + In elements + In elements + In bytes + The CUdeviceptr will be freed while disposing if the CudaPitchedDeviceVariable is the owner + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Copy from device to device memory + + Source + + + + Copy from device to device memory + + Source + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy from device to device memory + + Source + + + + Copy from device to device memory + + Source + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy from device to device memory + + Source + + + + Copy from device to device memory + + Source + Source pitch + + + + Copy from device to device memory + + Source + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy from Host to device memory + + Source + + + + Copy from Host to device memory + + Source + Width in bytes + Height in elements + + + + Copy from host to device memory + + Source + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy from Host to device memory + + Source + + + + Copy from Host to device memory + + Source + Width in elements + Height in elements + + + + Copy from host to device memory + + Source + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy from Host to device memory. Assumes that aHostDest has no additional line padding. + + Source + + + + Copy from host to device memory + + Source + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy data from device to host memory + + IntPtr to destination in host memory + + + + Copy data from device to host memory + + IntPtr to destination in host memory + Width in bytes + Height in elements + + + + Copy data from device to host memory + + Destination + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy data from device to host memory + + Destination + + + + Copy data from this device to host memory + + Destination + Width in elements + Height in elements + + + + Copy data from device to host memory + + Destination + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Copy data from device to host memory. Assumes that aHostDest has no additional line padding. + + Destination + + + + Copy data from device to host memory + + Destination + Source X in bytes + Source Y + Destination X in bytes + Destination Y + Width in bytes + Height in elements + Source pitch + Destination pitch + + + + Async Copy data from device to device memory + + Source pointer to device memory + + + + + Async Copy data from device to device memory + + Source + + + + + Async Copy from device to device memory + + Source + + + + + Async Copy data from device to device memory (1D Copy, copies destination pitch * height bytes data) + + Source pointer to device memory + + + + + Async Copy data from device to device memory (1D Copy, copies destination pitch * height bytes data) + + Source + + + + + Async Copy from device to device memory + + Source + + + + + Memset + + + + + + Memset + + + + + + Memset + + + + + + Memset + + + + + + + Memset + + + + + + + Memset + + + + + + + Copies from device memory in one context to device memory in another context + + Destination context + Source pointer to device memory + Source context + + + + Copies from device memory in one context to device memory in another context + + Destination context + Source pointer to device memory + Source context + + + + Async-Copies from device memory in one context to device memory in another context + + Destination context + Source pointer to device memory + Source context + + + + + Async-Copies from device memory in one context to device memory in another context + + Destination context + Source pointer to device memory + Source context + + + + + Access array elements directly from host. + Each single access invokes a device to host or host to device copy. Access is therefor rather slow. + + X-index in elements + Y-index in elements + + + + + Device pointer + + + + + Width in elements + + + + + Width in bytes + + + + + Height in elements + + + + + Pitch in bytes + + + + + Total size in bytes (Pitch * Height) + + + + + Type size in bytes + + + + + Converts a device variable to a host array + + device variable + newly allocated host array with values from device memory + + + + Measures via CUDA events the timespan between Start() and Stop() calls. + + + + + + + + + + + + + + + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + Start measurement + + + + + Stop measurement + + + + + Get elapsed time in milliseconds, sync on stop event + + Elapsed time in ms + + + + Get elapsed time in milliseconds, no sync on stop event + + Elapsed time in ms + + + + Returns the inner start event + + + + + Returns the inner stop event + + + + + Returns the inner stream + + + + + Wrapps a CUstream handle. + In case of a so called NULL stream, use the native CUstream struct instead. + + + + + Creates a new Stream using + + + + + Creates a new wrapper for an existing stream + + + + + Creates a new Stream + + Parameters for stream creation (must be ) + + + + Creates a new Stream using and with the given priority + This API alters the scheduler priority of work in the stream. Work in a higher priority stream + may preempt work already executing in a low priority stream. + priority follows a convention where lower numbers represent higher priorities. + '0' represents default priority. + + Stream priority. Lower numbers represent higher priorities. + + + + Creates a new Stream using and with the given priority + This API alters the scheduler priority of work in the stream. Work in a higher priority stream + may preempt work already executing in a low priority stream. + priority follows a convention where lower numbers represent higher priorities. + '0' represents default priority. + + Stream priority. Lower numbers represent higher priorities. + Parameters for stream creation (must be ) + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + returns the wrapped CUstream handle + + + + + Waits until the device has completed all operations in the stream. If the context was created + with the flag, the CPU thread will block until the stream is finished with all of its + tasks. + + + + + Returns true if all operations in the stream have completed, or + false if not. + + + + + + Make a compute stream wait on an event + Makes all future work submitted to the Stream wait until hEvent + reports completion before beginning execution. This synchronization + will be performed efficiently on the device. + + The stream will wait only for the completion of the most recent + host call to on hEvent. Once this call has returned, + any functions (including and may be + called on hEvent again, and the subsequent calls will not have any + effect on this stream. + + If hStream is 0 (the NULL stream) any future work submitted in any stream + will wait for hEvent to complete before beginning execution. This + effectively creates a barrier for all future work submitted to the context. + + If has not been called on hEvent, this call acts as if + the record has already completed, and so is a functional no-op. + + + + + + Adds a callback to be called on the host after all currently enqueued + items in the stream have completed. For each + cuStreamAddCallback call, the callback will be executed exactly once. + The callback will block later work in the stream until it is finished. + + The callback may be passed or an error code. In the event + of a device error, all subsequently executed callbacks will receive an + appropriate . + + Callbacks must not make any CUDA API calls. Attempting to use a CUDA API + will result in . Callbacks must not perform any + synchronization that may depend on outstanding device work or other callbacks + that are not mandated to run earlier. Callbacks without a mandated order + (in independent streams) execute in undefined order and may be serialized. + + This API requires compute capability 1.1 or greater. See + cuDeviceGetAttribute or ::cuDeviceGetProperties to query compute + capability. Attempting to use this API with earlier compute versions will + return . + + The function to call once preceding stream operations are complete + User specified data to be passed to the callback function. Use GCAlloc to pin a managed object + Callback flags (must be CUStreamAddCallbackFlags.None) + + + + Here the Stream is the NULL stream + Adds a callback to be called on the host after all currently enqueued + items in the stream have completed. For each + cuStreamAddCallback call, the callback will be executed exactly once. + The callback will block later work in the stream until it is finished. + + The callback may be passed or an error code. In the event + of a device error, all subsequently executed callbacks will receive an + appropriate . + + Callbacks must not make any CUDA API calls. Attempting to use a CUDA API + will result in . Callbacks must not perform any + synchronization that may depend on outstanding device work or other callbacks + that are not mandated to run earlier. Callbacks without a mandated order + (in independent streams) execute in undefined order and may be serialized. + + This API requires compute capability 1.1 or greater. See + cuDeviceGetAttribute or ::cuDeviceGetProperties to query compute + capability. Attempting to use this API with earlier compute versions will + return . + + The function to call once preceding stream operations are complete + User specified data to be passed to the callback function. Use GCAlloc to pin a managed object + Callback flags (must be CUStreamAddCallbackFlags.None) + + + + Query the priority of this stream + + the stream's priority + + + + Query the flags of this stream. + + the stream's flags + The value returned in flags is a logical 'OR' of all flags that + were used while creating this stream. + + + + Wait on a memory location + Enqueues a synchronization of the stream on the given memory location. Work + ordered after the operation will block until the given condition on the + memory is satisfied. By default, the condition is to wait for (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. + + Other condition types can be specified via \p flags. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with::cuMemHostGetDevicePointer(). This function cannot + be used with managed memory(::cuMemAllocManaged). + + Support for this can be queried with ::cuDeviceGetAttribute() and + ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The only requirement for basic + support is that on Windows, a device must be in TCC mode. + + The memory location to wait on. + The value to compare with the memory location. + See::CUstreamWaitValue_flags. + + + + Wait on a memory location + Enqueues a synchronization of the stream on the given memory location. Work + ordered after the operation will block until the given condition on the + memory is satisfied. By default, the condition is to wait for (int32_t)(*addr - value) >= 0, a cyclic greater-or-equal. + + Other condition types can be specified via \p flags. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with::cuMemHostGetDevicePointer(). This function cannot + be used with managed memory(::cuMemAllocManaged). + + Support for this can be queried with ::cuDeviceGetAttribute() and + ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The requirements are + compute capability 7.0 or greater, and on Windows, that the device be in + TCC mode. + + The memory location to wait on. + The value to compare with the memory location. + See::CUstreamWaitValue_flags. + + + + Write a value to memory + + Write a value to memory.Unless the ::CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER + flag is passed, the write is preceded by a system-wide memory fence, + equivalent to a __threadfence_system() but scoped to the stream + rather than a CUDA thread. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with::cuMemHostGetDevicePointer(). This function cannot + be used with managed memory(::cuMemAllocManaged). + + Support for this can be queried with ::cuDeviceGetAttribute() and + ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The only requirement for basic + support is that on Windows, a device must be in TCC mode. + + The device address to write to. + The value to write. + See::CUstreamWriteValue_flags. + + + + Write a value to memory + + Write a value to memory.Unless the ::CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER + flag is passed, the write is preceded by a system-wide memory fence, + equivalent to a __threadfence_system() but scoped to the stream + rather than a CUDA thread. + + If the memory was registered via ::cuMemHostRegister(), the device pointer + should be obtained with::cuMemHostGetDevicePointer(). This function cannot + be used with managed memory(::cuMemAllocManaged). + + Support for this can be queried with ::cuDeviceGetAttribute() and + ::CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS. The requirements are + compute capability 7.0 or greater, and on Windows, that the device be in + TCC mode. + + The device address to write to. + The value to write. + See::CUstreamWriteValue_flags. + + + + Copies attributes from source stream to destination stream + Copies attributes from source stream \p src to destination stream \p dst. + Both streams must have the same context. + + Destination stream + + + + Queries stream attribute. + Queries attribute \p attr from \p hStream and stores it in corresponding member of \p value_out. + + + + + + Sets stream attribute. + Sets attribute \p attr on \p hStream from corresponding attribute of + value.The updated attribute will be applied to subsequent work + submitted to the stream. It will not affect previously submitted work. + + + + + + + CudaLinearTexture1D + + + + + Creates a new 1D texture from linear memory. Allocates a new device variable + + + + + + + In elements + + + + Creates a new 1D texture from linear memory. + + + + + + + + + + + For dispose + + + + + Dispose + + + + + For IDisposable + + + + + + TextureReference + + + + + Flags + + + + + AddressMode + + + + + Format + + + + + Filtermode + + + + + Size + + + + + ChannelSize + + + + + TotalSizeInBytes + + + + + NumChannels + + + + + Name + + + + + Module + + + + + CUFunction + + + + + Device variable in linear Memory + + + + + Binds a linear address range to the texture reference. + Any previous address or CUDA array state associated with the texture reference is superseded by this function. + Any memory previously bound to the texture reference is unbound. + Size my differ to the previous bound variable, but type must be the same. + + New device variable to bind this texture reference to. + + + + CUDA device properties + + + + + Typical clock frequency in kilohertz + + + + + Maximum block dimensions + + + + + Maximum grid dimensions + + + + + Maximum number of threads per block + + + + + Maximum pitch in bytes allowed by memory copies + + + + + Maximum number of 32-bit registers available per block + + + + + Maximum shared memory available per block in bytes + + + + + Alignment requirement for textures + + + + + Memory available on device for __constant__ variables in a CUDA C kernel in bytes + + + + + Name of the device + + + + + Driver version + + + + + Total amount of global memory on the device + + + + + Number of multiprocessors on device + + + + + Warp size in threads (also called SIMDWith) + + + + + Device can possibly copy memory and execute a kernel concurrently + + + + + Specifies whether there is a run time limit on kernels + + + + + Device is integrated with host memory + + + + + Device can map host memory into CUDA address space + + + + + Compute mode (See CUComputeMode for details) + + + + + Maximum 1D texture width + + + + + Maximum 2D texture width + + + + + Maximum 2D texture height + + + + + Maximum 3D texture width + + + + + Maximum 3D texture height + + + + + Maximum 3D texture depth + + + + + Maximum texture array width + + + + + Maximum texture array height + + + + + Maximum slices in a texture array + + + + + Alignment requirement for surfaces + + + + + Device can possibly execute multiple kernels concurrently + + + + + Device has ECC support enabled + + + + + PCI bus ID of the device + + + + + PCI device ID of the device + + + + + Device is using TCC driver model + + + + + Peak memory clock frequency in kilohertz + + + + + Global memory bus width in bits + + + + + Size of L2 cache in bytes + + + + + Maximum resident threads per multiprocessor + + + + + Number of asynchronous engines + + + + + Device shares a unified address space with the host + + + + + Maximum 1D layered texture width + + + + + Maximum layers in a 1D layered texture + + + + + PCI domain ID of the device + + + + + Pitch alignment requirement for textures + + + + + Maximum cubemap texture width/height + + + + + Maximum cubemap layered texture width/height + + + + + Maximum layers in a cubemap layered texture + + + + + Maximum 1D surface width + + + + + Maximum 2D surface width + + + + + Maximum 2D surface height + + + + + Maximum 3D surface width + + + + + Maximum 3D surface height + + + + + Maximum 3D surface depth + + + + + Maximum 1D layered surface width + + + + + Maximum layers in a 1D layered surface + + + + + Maximum 2D layered surface width + + + + + Maximum 2D layered surface height + + + + + Maximum layers in a 2D layered surface + + + + + Maximum cubemap surface width + + + + + Maximum cubemap layered surface width + + + + + Maximum layers in a cubemap layered surface + + + + + Maximum 1D linear texture width + + + + + Maximum 2D linear texture width + + + + + Maximum 2D linear texture height + + + + + Maximum 2D linear texture pitch in bytes + + + + + Maximum mipmapped 2D texture width + + + + + Maximum mipmapped 2D texture height + + + + + Major compute capability version number + + + + + Minor compute capability version number + + + + + Compute capability version number + + + + + Maximum mipmapped 1D texture width + + + + + Device supports stream priorities + + + + + Device supports caching globals in L1 + + + + + Device supports caching locals in L1 + + + + + Maximum shared memory available per multiprocessor in bytes + + + + + Maximum number of 32-bit registers available per multiprocessor + + + + + Device can allocate managed memory on this system + + + + + Device is on a multi-GPU board + + + + + Unique id for a group of devices on the same multi-GPU board + + + + + Link between the device and the host supports native atomic operations (this is a placeholder attribute, and is not supported on any current hardware) + + + + + Ratio of single precision performance (in floating-point operations per second) to double precision performance + + + + + Device supports coherently accessing pageable memory without calling cudaHostRegister on it + + + + + Device can coherently access managed memory concurrently with the CPU + + + + + Device supports compute preemption. + + + + + Device can access host registered memory at the same virtual address as the CPU. + + + + + cuStreamBatchMemOp and related APIs are supported. + + + + + 64-bit operations are supported in ::cuStreamBatchMemOp and related APIs. + + + + + CU_STREAM_WAIT_VALUE_NOR is supported. + + + + + Device supports launching cooperative kernels via ::cuLaunchCooperativeKernel + + + + + Device can participate in cooperative kernels launched via ::cuLaunchCooperativeKernelMultiDevice + + + + + Maximum optin shared memory per block + + + + + Both the ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the device. See \ref CUDA_MEMOP for additional details. + + + + + Device supports host memory registration via ::cudaHostRegister. + + + + + Device accesses pageable memory via the host's page tables. + + + + + The host can directly access managed memory on the device without migration. + + + + + Device supports virtual address management APIs like ::cuMemAddressReserve, ::cuMemCreate, ::cuMemMap and related APIs + + + + + Device supports exporting memory to a posix file descriptor with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate + + + + + Device supports exporting memory to a Win32 NT handle with ::cuMemExportToShareableHandle, if requested via ::cuMemCreate + + + + + Device supports exporting memory to a Win32 KMT handle with ::cuMemExportToShareableHandle, if requested ::cuMemCreate + + + + + Maximum number of blocks per multiprocessor + + + + + Device supports compression of memory + + + + + Device's maximum L2 persisting lines capacity setting in bytes + + + + + The maximum value of CUaccessPolicyWindow::num_bytes. + + + + + Device supports specifying the GPUDirect RDMA flag with ::cuMemCreate + + + + + Shared memory reserved by CUDA driver per block in bytes + + + + + Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays + + + + + Device supports using the ::cuMemHostRegister flag CU_MEMHOSTERGISTER_READ_ONLY to register memory that must be mapped as read-only to the GPU + + + + + + + + + + + + + + + + + Direct3D 9 Interoperability + + + + + Direct3D9 Interoperability for CUDA 3.x - Returned module - Fat binary to load - CUDA Error Codes: , , , - , , , - , , - . - Note that this function may also return error codes from previous, asynchronous launches. - + - Unloads a module hmod from the current context. + Returns in pCudaDevice the CUDA-compatible device corresponding to the adapter name pszAdapterName + obtained from EnumDisplayDevices() or IDirect3D9::GetAdapterIdentifier(). + If no device on the adapter with name pszAdapterName is CUDA-compatible, then the call will fail. - Module to unload + Returned CUDA device corresponding to pszAdapterName + Adapter name to query for device CUDA Error Codes: , , , - , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in hfunc the handle of the function of name name located in module hmod. If no function of that name - exists, returns . + Gets the CUDA devices corresponding to a Direct3D 9 device + Returns in pCudaDeviceCount the number of CUDA-compatible device corresponding + to the Direct3D 9 device pD3D9Device. + Also returns in pCudaDevices at most cudaDeviceCount of the the CUDA-compatible devices + corresponding to the Direct3D 9 device pD3D9Device. + + If any of the GPUs being used to render pDevice are not CUDA capable then the + call will return . - Returned function handle - Module to retrieve function from - Name of function to retrieve + Returned number of CUDA devices corresponding to pD3D9Device + Returned CUDA devices corresponding to pD3D9Device + The size of the output device array pCudaDevices + Direct3D 9 device to query for CUDA devices + The set of devices to return. CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in dptr and bytes the base pointer and size of the global of name name located in module hmod. If no - variable of that name exists, returns . Both parameters dptr - and bytes are optional. If one of them is null, it is ignored. + Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and + associates the created CUDA context with the calling thread. The created will be returned in pCtx. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + If pCudaDevice is non-NULL then the on which this CUDA context was created will be returned in + pCudaDevice. + On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented + upon destruction of this context through . This context will cease to function if pD3DDevice + is destroyed or encounters an error. - Returned global device pointer - Returned global size in bytes - Module to retrieve global from - Name of global to retrieve + Returned newly created CUDA context + Returned pointer to the device on which the context was created + Context creation flags (see for details) + Direct3D device to create interoperability context with CUDA Error Codes: , , , - , , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pTexRef the handle of the texture reference of name name in the module hmod. If no texture reference - of that name exists, returns . This texture reference handle - should not be destroyed, since it will be destroyed when the module is unloaded. + Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and + associates the created CUDA context with the calling thread. The created will be returned in pCtx. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented + upon destruction of this context through . This context will cease to function if pD3DDevice + is destroyed or encounters an error. - Returned texture reference - Module to retrieve texture reference from - Name of texture reference to retrieve + Returned newly created CUDA context + Context creation flags (see for details) + Direct3D device to create interoperability context with + Returned pointer to the device on which the context was created CUDA Error Codes: , , , - , , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pSurfRef the handle of the surface reference of name name in the module hmod. If no surface reference - of that name exists, returns . + Registers the Direct3D 9 resource pD3DResource for access by CUDA and returns a CUDA handle to + pD3Dresource in pCudaResource. The handle returned in pCudaResource may be used to map and + unmap this resource until it is unregistered. On success this call will increase the internal reference count on + pD3DResource. This reference count will be decremented when this resource is unregistered through . + This call is potentially high-overhead and should not be called every frame in interactive applications. + The type of pD3DResource must be one of the following: + + Type of pD3DResourceRestriction + IDirect3DVertexBuffer9 + May be accessed through a device pointer. + + IDirect3DIndexBuffer9 + May be accessed through a device pointer. + + IDirect3DSurface9 + May be accessed through an array. Only stand-alone objects of type IDirect3DSurface9 + may be explicitly shared. In particular, individual mipmap levels and faces of cube maps may not be registered + directly. To access individual surfaces associated with a texture, one must register the base texture object. + + IDirect3DBaseTexture9 + Individual surfaces on this texture may be accessed through an array. + + + The Flags argument may be used to specify additional parameters at register time. The only valid value for this + parameter is . + Not all Direct3D resources of the above types may be used for interoperability with CUDA. The following are some + limitations. + • The primary rendertarget may not be registered with CUDA. + • Resources allocated as shared may not be registered with CUDA. + • Textures which are not of a format which is 1, 2, or 4 channels of 8, 16, or 32-bit integer or floating-point data + cannot be shared. + • Surfaces of depth or stencil formats cannot be shared. + If Direct3D interoperability is not initialized for this context using then + is returned. If pD3DResource is of incorrect type or is already registered then + is returned. If pD3DResource cannot be registered then + is returned. If Flags is not one of the above specified value then + is returned. - Returned surface reference - Module to retrieve surface reference from - Name of surface reference to retrieve + Returned graphics resource handle + Direct3D resource to register + Parameters for resource registration CUDA Error Codes: , , , - , , . + , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Creates a pending JIT linker invocation. - If the call is successful, the caller owns the returned CUlinkState, which should eventually be destroyed with ::cuLinkDestroy. - The device code machine size (32 or 64 bit) will match the calling application. - Both linker and compiler options may be specified. Compiler options will be applied to inputs to this linker action which must - be compiled from PTX. The options ::CU_JIT_WALL_TIME, - ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, and ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES will accumulate data until the CUlinkState is destroyed. - optionValues must remain valid for the life of the CUlinkState if output options are used. No other references to inputs are maintained after this call returns. + Returns in ppD3DDevice the Direct3D device against which this CUDA context + was created in . - Size of options arrays - Array of linker and compiler options - Array of option values, each cast to void * - On success, this will contain a CUlinkState to specify and complete this action - + Returned Direct3D device corresponding to CUDA context + CUDA Error Codes: , , , + . + Note that this function may also return error codes from previous, asynchronous launches. - + - Add an input to a pending linker invocation. - Ownership of data data is retained by the caller. No reference is retained to any inputs after this call returns. - This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of - ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER, ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET. + Direct3D 10 Interoperability - A pending linker action. - The type of the input data. - The input data. PTX must be NULL-terminated. - The length of the input data. - An optional name for this input in log messages. - Size of options. - Options to be applied only for this input (overrides options from ::cuLinkCreate). - Array of option values, each cast to void *. - - + - Add a file input to a pending linker invocation. - No reference is retained to any inputs after this call returns. - This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of - ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER, ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET. - This method is equivalent to invoking ::cuLinkAddData on the contents of the file. + Direct3D10 Interoperability for CUDA 3.x - A pending linker action. - The type of the input data. - Path to the input file. - Size of options. - Options to be applied only for this input (overrides options from ::cuLinkCreate). - Array of option values, each cast to void *. - - + - Complete a pending linker invocation. - Completes the pending linker action and returns the cubin image for the linked - device code, which can be used with ::cuModuleLoadData. The cubin is owned by - state, so it should be loaded before state is destroyed via ::cuLinkDestroy. - This call does not destroy state. + Returns in device the CUDA-compatible device corresponding to the adapter pAdapter obtained from + IDXGIFactory::EnumAdapters. This call will succeed only if a device on adapter pAdapter is Cuda-compatible. - A pending linker invocation - On success, this will point to the output image - Optional parameter to receive the size of the generated image - + Returned CUDA device corresponding to pszAdapterName + Adapter (type: IDXGIAdapter) + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Destroys state for a JIT linker invocation. + Gets the CUDA devices corresponding to a Direct3D 10 device + Returns in pCudaDeviceCount the number of CUDA-compatible device corresponding + to the Direct3D 10 device pD3D10Device. + Also returns in pCudaDevices at most cudaDeviceCount of the the CUDA-compatible devices + corresponding to the Direct3D 10 device pD3D10Device. + + If any of the GPUs being used to render pDevice are not CUDA capable then the + call will return . - State object for the linker invocation - + Returned number of CUDA devices corresponding to pD3D9Device + Returned CUDA devices corresponding to pD3D9Device + The size of the output device array pCudaDevices + Direct3D 10 device to query for CUDA devices + The set of devices to return. + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Combines all API calls for memory management + Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and + associates the created CUDA context with the calling thread. The created will be returned in pCtx. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + If pCudaDevice is non-NULL then the on which this CUDA context was created will be returned in + pCudaDevice. + On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented + upon destruction of this context through . This context will cease to function if pD3DDevice + is destroyed or encounters an error. + Returned newly created CUDA context + Returned pointer to the device on which the context was created + Context creation flags (see for details) + Direct3D device to create interoperability context with + CUDA Error Codes: , , , + , , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in free and total respectively, the free and total amount of memory available for allocation by the - CUDA context, in bytes. + Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and + associates the created CUDA context with the calling thread. The created will be returned in pCtx. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented + upon destruction of this context through . This context will cease to function if pD3DDevice + is destroyed or encounters an error. - Returned free memory in bytes - Returned total memory in bytes + Returned newly created CUDA context + Context creation flags (see for details) + Direct3D device to create interoperability context with + Returned pointer to the device on which the context was created CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Allocates bytesize bytes of linear memory on the device and returns in dptr a pointer to the allocated memory. - The allocated memory is suitably aligned for any kind of variable. The memory is not cleared. If bytesize is 0, - returns . + Registers the Direct3D 10 resource pD3DResource for access by CUDA and returns a CUDA handle to + pD3Dresource in pCudaResource. The handle returned in pCudaResource may be used to map and + unmap this resource until it is unregistered. On success this call will increase the internal reference count on + pD3DResource. This reference count will be decremented when this resource is unregistered through . + This call is potentially high-overhead and should not be called every frame in interactive applications. + The type of pD3DResource must be one of the following: + + Type of pD3DResourceRestriction + ID3D10Buffer + May be accessed through a device pointer. + + ID3D10Texture1D + Individual subresources of the texture may be accessed via arrays. + + ID3D10Texture2D + Individual subresources of the texture may be accessed via arrays. + + ID3D10Texture3D + Individual subresources of the texture may be accessed via arrays. + + + The Flags argument may be used to specify additional parameters at register time. The only valid value for this + parameter is . + Not all Direct3D resources of the above types may be used for interoperability with CUDA. The following are some + limitations. + • The primary rendertarget may not be registered with CUDA. + • Resources allocated as shared may not be registered with CUDA. + • Textures which are not of a format which is 1, 2, or 4 channels of 8, 16, or 32-bit integer or floating-point data + cannot be shared. + • Surfaces of depth or stencil formats cannot be shared. + If Direct3D interoperability is not initialized for this context using then + is returned. If pD3DResource is of incorrect type or is already registered then + is returned. If pD3DResource cannot be registered then + is returned. If Flags is not one of the above specified value then + is returned. - Returned device pointer - Requested allocation size in bytes + Returned graphics resource handle + Direct3D resource to register + Parameters for resource registration CUDA Error Codes: , , , - , , . + , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Allocates at least WidthInBytes * Height bytes of linear memory on the device and returns in dptr a pointer - to the allocated memory. The function may pad the allocation to ensure that corresponding pointers in any given - row will continue to meet the alignment requirements for coalescing as the address is updated from row to row. - ElementSizeBytes specifies the size of the largest reads and writes that will be performed on the memory range. - ElementSizeBytes may be 4, 8 or 16 (since coalesced memory transactions are not possible on other data sizes). If - ElementSizeBytes is smaller than the actual read/write size of a kernel, the kernel will run correctly, but possibly - at reduced speed. The pitch returned in pPitch by is the width in bytes of the allocation. The - intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array. - Given the row and column of an array element of type T, the address is computed as: - T * pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; - The pitch returned by is guaranteed to work with under all circumstances. For - allocations of 2D arrays, it is recommended that programmers consider performing pitch allocations using . - Due to alignment restrictions in the hardware, this is especially true if the application will be performing - 2D memory copies between different regions of device memory (whether linear memory or CUDA arrays). - The byte alignment of the pitch returned by is guaranteed to match or exceed the alignment - requirement for texture binding with . + Returns in ppD3DDevice the Direct3D device against which this CUDA context + was created in . - Returned device pointer - Returned pitch of allocation in bytes - Requested allocation width in bytes - Requested allocation height in rows - Size of largest reads/writes for range + Returned Direct3D device corresponding to CUDA context CUDA Error Codes: , , , - , , . + . Note that this function may also return error codes from previous, asynchronous launches. - + - Frees the memory space pointed to by dptr, which must have been returned by a previous call to or - . + Direct3D 11 Interoperability for CUDA 3.x - Pointer to memory to free + + + + Returns in device the CUDA-compatible device corresponding to the adapter pAdapter obtained from + IDXGIFactory::EnumAdapters. This call will succeed only if a device on adapter pAdapter is Cuda-compatible. + + Returned CUDA device corresponding to pszAdapterName + Adapter (type: IDXGIAdapter) CUDA Error Codes: , , , - , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns the base address in pbase and size in psize of the allocation by or - that contains the input pointer dptr. Both parameters pbase and psize are optional. If one of them is null, it is - ignored. + Gets the CUDA devices corresponding to a Direct3D 11 device + Returns in pCudaDeviceCount the number of CUDA-compatible device corresponding + to the Direct3D 11 device pD3D11Device. + Also returns in pCudaDevices at most cudaDeviceCount of the the CUDA-compatible devices + corresponding to the Direct3D 11 device pD3D11Device. + + If any of the GPUs being used to render pDevice are not CUDA capable then the + call will return . - Returned base address - Returned size of device memory allocation - Device pointer to query + Returned number of CUDA devices corresponding to pD3D9Device + Returned CUDA devices corresponding to pD3D11Device + The size of the output device array pCudaDevices + Direct3D 11 device to query for CUDA devices + The set of devices to return. CUDA Error Codes: , , , - , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Allocates bytesize bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual - memory ranges allocated with this function and automatically accelerates calls to functions such as . - Since the memory can be accessed directly by the device, it can be read or written with much higher bandwidth than - pageable memory obtained with functions such as malloc(). Allocating excessive amounts of memory with - may degrade system performance, since it reduces the amount of memory available to the system for paging. - As a result, this function is best used sparingly to allocate staging areas for data exchange between host and device. + Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and + associates the created CUDA context with the calling thread. The created will be returned in pCtx. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + If pCudaDevice is non-NULL then the on which this CUDA context was created will be returned in + pCudaDevice. + On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented + upon destruction of this context through . This context will cease to function if pD3DDevice + is destroyed or encounters an error. - Returned host pointer to page-locked memory - Requested allocation size in bytes + Returned newly created CUDA context + Returned pointer to the device on which the context was created + Context creation flags (see for details) + Direct3D device to create interoperability context with CUDA Error Codes: , , , - , , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Frees the memory space pointed to by p, which must have been returned by a previous call to . + Creates a new CUDA context, enables interoperability for that context with the Direct3D device pD3DDevice, and + associates the created CUDA context with the calling thread. The created will be returned in pCtx. + Direct3D resources from this device may be registered and mapped through the lifetime of this CUDA context. + On success, this call will increase the internal reference count on pD3DDevice. This reference count will be decremented + upon destruction of this context through . This context will cease to function if pD3DDevice + is destroyed or encounters an error. - Pointer to memory to free + Returned newly created CUDA context + Context creation flags (see for details) + Direct3D device to create interoperability context with + Returned pointer to the device on which the context was created CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Allocates bytesize bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual - memory ranges allocated with this function and automatically accelerates calls to functions such as . - Since the memory can be accessed directly by the device, it can be read or written with much higher bandwidth than - pageable memory obtained with functions such as malloc(). Allocating excessive amounts of pinned - memory may degrade system performance, since it reduces the amount of memory available to the system for paging. - As a result, this function is best used sparingly to allocate staging areas for data exchange between host and device. - For the Flags parameter see . - The CUDA context must have been created with the flag in order for the - flag to have any effect. - The flag may be specified on CUDA contexts for devices that do not support - mapped pinned memory. The failure is deferred to because the memory may be - mapped into other CUDA contexts via the flag. - The memory allocated by this function must be freed with . - Note all host memory allocated using will automatically - be immediately accessible to all contexts on all devices which support unified - addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). - Unless the flag ::CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device pointer - that may be used to access this host memory from those contexts is always equal - to the returned host pointer pp. If the flag ::CU_MEMHOSTALLOC_WRITECOMBINED - is specified, then the function must be used - to query the device pointer, even if the context supports unified addressing. - See \ref CUDA_UNIFIED for additional details. + Registers the Direct3D 11 resource pD3DResource for access by CUDA and returns a CUDA handle to + pD3Dresource in pCudaResource. The handle returned in pCudaResource may be used to map and + unmap this resource until it is unregistered. On success this call will increase the internal reference count on + pD3DResource. This reference count will be decremented when this resource is unregistered through . + This call is potentially high-overhead and should not be called every frame in interactive applications. + The type of pD3DResource must be one of the following: + + Type of pD3DResourceRestriction + ID3D11Buffer + May be accessed through a device pointer. + + ID3D11Texture1D + Individual subresources of the texture may be accessed via arrays. + + ID3D11Texture2D + Individual subresources of the texture may be accessed via arrays. + + ID3D11Texture3D + Individual subresources of the texture may be accessed via arrays. + + + The Flags argument may be used to specify additional parameters at register time. The only valid value for this + parameter is . + Not all Direct3D resources of the above types may be used for interoperability with CUDA. The following are some + limitations. + • The primary rendertarget may not be registered with CUDA. + • Resources allocated as shared may not be registered with CUDA. + • Textures which are not of a format which is 1, 2, or 4 channels of 8, 16, or 32-bit integer or floating-point data + cannot be shared. + • Surfaces of depth or stencil formats cannot be shared. + If Direct3D interoperability is not initialized for this context using then + is returned. If pD3DResource is of incorrect type or is already registered then + is returned. If pD3DResource cannot be registered then + is returned. If Flags is not one of the above specified value then + is returned. - Returned host pointer to page-locked memory - Requested allocation size in bytes - Flags for allocation request + Returned graphics resource handle + Direct3D resource to register + Parameters for resource registration CUDA Error Codes: , , , - , , . + , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Passes back the device pointer pdptr corresponding to the mapped, pinned host buffer p allocated by . - will fail if the flag was not specified at the - time the memory was allocated, or if the function is called on a GPU that does not support mapped pinned memory. - Flags provides for future releases. For now, it must be set to 0. + Returns in ppD3DDevice the Direct3D device against which this CUDA context + was created in . - Returned device pointer - Host pointer - Options (must be 0) + Returned Direct3D device corresponding to CUDA context CUDA Error Codes: , , , - , . + . Note that this function may also return error codes from previous, asynchronous launches. - + + + C# wrapper for the NVIDIA CUDA Driver API (--> cuda.h) + + + + + Gives the version of the wrapped api + + + + + Initializes the driver API and must be called before any other function from the driver API. Currently, + the Flags parameter must be . If has not been called, any function from the driver API will return + . + + Before any call to the CUDA Driver API can be done, the API must be initialized with cuInit(0). + Currently, Flags must always be . + CUDA Error Codes: , , .Note that this function may also return error codes from previous, asynchronous launches. + + + + Returns in driverVersion the version number of the installed CUDA driver. This function automatically returns + if the driverVersion argument is NULL. + + Returns the CUDA driver version + CUDA Error Codes: , .Note that this function may also return error codes from previous, asynchronous launches. + + - Passes back the flags pFlags that were specified when allocating the pinned host buffer p allocated by - . - will fail if the pointer does not reside in an allocation performed by or - . + Combines all API calls for device management - Returned flags - Host pointer - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Page-locks the memory range specified by p and bytesize and maps it - for the device(s) as specified by Flags. This memory range also is added - to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate - calls to functions such as . Since the memory can be accessed - directly by the device, it can be read or written with much higher bandwidth - than pageable memory that has not been registered. Page-locking excessive - amounts of memory may degrade system performance, since it reduces the amount - of memory available to the system for paging. As a result, this function is - best used sparingly to register staging areas for data exchange between - host and device. - The pointer p and size bytesize must be aligned to the host page size (4 KB). - The memory page-locked by this function must be unregistered with + Returns in device a device handle given an ordinal in the range [0, -1]. - Host pointer to memory to page-lock - Size in bytes of the address range to page-lock - Flags for allocation request + Returned device handle + Device number to get handle for CUDA Error Codes: , , , - , , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Unmaps the memory range whose base address is specified by p, and makes it pageable again. - The base address must be the same one specified to . + Returns in count the number of devices with compute capability greater than or equal to 2.0 that are available for + execution. If there is no such device, returns 0. - Host pointer to memory to page-lock + Returned number of compute-capable devices CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns information about a pointer + Returns an ASCII string identifying the device dev in the NULL-terminated string pointed to by name. len specifies + the maximum length of the string that may be returned. - Returned pointer attribute value - Pointer attribute to query - Pointer + Returned identifier string for the device + Maximum length of string to store in name + Device to get identifier string for CUDA Error Codes: , , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns information about a pointer + Return an UUID for the device + Returns 16-octets identifing the device \p dev in the structure pointed by the \p uuid. - Returned pointer attribute value - Pointer attribute to query - Pointer - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned UUID + Device to get identifier string for + - + - Returns information about a pointer + Return an UUID for the device (11.4+) + Returns 16-octets identifing the device \p dev in the structure + pointed by the \p uuid.If the device is in MIG mode, returns its + MIG UUID which uniquely identifies the subscribed MIG compute instance. + Returns 16-octets identifing the device \p dev in the structure pointed by the \p uuid. - Returned pointer attribute value - Pointer attribute to query - Pointer - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned UUID + Device to get identifier string for + - + - Returns information about a pointer + Return an LUID and device node mask for the device. + Return identifying information (\p luid and \p deviceNodeMask) to allow + matching device with graphics APIs. - Returned pointer attribute value - Pointer attribute to query - Pointer - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned LUID + Returned device node mask + Device to get identifier string for + - + - Returns information about a pointer + Returns in bytes the total amount of memory available on the device dev in bytes. - Returned pointer attribute value - Pointer attribute to query - Pointer + Returned memory available on device in bytes + Device handle CUDA Error Codes: , , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns information about a pointer + Returns the maximum number of elements allocatable in a 1D linear texture for a given texture element size. + Returns in \p maxWidthInElements the maximum number of texture elements allocatable in a 1D linear texture + for given \p format and \p numChannels. - Returned pointer attribute value - Pointer attribute to query - Pointer - CUDA Error Codes: , , , - , , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned maximum number of texture elements allocatable for given \p format and \p numChannels. + Texture format. + Number of channels per texture element. + Device handle. + - + - Returns information about a pointer + Returns in pi the integer value of the attribute attrib on device dev. See . - Returned pointer attribute value - Pointer attribute to query - Pointer + Returned device attribute value + Device attribute to query + Device handle CUDA Error Codes: , , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Prefetches memory to the specified destination device - Prefetches memory to the specified destination device. devPtr is the - base device pointer of the memory to be prefetched and dstDevice is the - destination device. count specifies the number of bytes to copy. hStream - is the stream in which the operation is enqueued. - - Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. - - If no physical memory has been allocated for this region, then this memory region - will be populated and mapped on the destination device. If there's insufficient - memory to prefetch the desired region, the Unified Memory driver may evict pages - belonging to other memory regions to make room. If there's no memory that can be - evicted, then the Unified Memory driver will prefetch less than what was requested. - - In the normal case, any mappings to the previous location of the migrated pages are - removed and mappings for the new location are only setup on the dstDevice. - The application can exercise finer control on these mappings using ::cudaMemAdvise. + Return NvSciSync attributes that this device can support. + Returns in \p nvSciSyncAttrList, the properties of NvSciSync that + this CUDA device, \p dev can support.The returned \p nvSciSyncAttrList + can be used to create an NvSciSync object that matches this device’s capabilities. + + If NvSciSyncAttrKey_RequiredPerm field in \p nvSciSyncAttrList is + already set this API will return ::CUDA_ERROR_INVALID_VALUE. + + The applications should set \p nvSciSyncAttrList to a valid + NvSciSyncAttrList failing which this API will return + ::CUDA_ERROR_INVALID_HANDLE. + + The \p flags controls how applications intends to use + the NvSciSync created from the \p nvSciSyncAttrList. The valid flags are: + - ::CUDA_NVSCISYNC_ATTR_SIGNAL, specifies that the applications intends to + signal an NvSciSync on this CUDA device. + - ::CUDA_NVSCISYNC_ATTR_WAIT, specifies that the applications intends to + wait on an NvSciSync on this CUDA device. + + At least one of these flags must be set, failing which the API + returns::CUDA_ERROR_INVALID_VALUE.Both the flags are orthogonal + to one another: a developer may set both these flags that allows to + set both wait and signal specific attributes in the same \p nvSciSyncAttrList. - Pointer to be prefetched - Size in bytes - Destination device to prefetch to - Stream to enqueue prefetch operation - Note that this function is asynchronous with respect to the host and all work on other devices. + Return NvSciSync attributes supported + Valid Cuda Device to get NvSciSync attributes for. + flags describing NvSciSync usage. - + - Advise about the usage of a given memory range - Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. - - The \p advice parameter can take the following values: - - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read - from and only occasionally written to. This allows the driver to create read-only - copies of the data in a processor's memory when that processor accesses it. Similarly, - if cuMemPrefetchAsync is called on this region, it will create a read-only copy of - the data on the destination processor. When a processor writes to this data, all copies - of the corresponding page are invalidated except for the one where the write occurred. - The \p device argument is ignored for this advice. - - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read - duplicated copies of the data will be freed no later than the next write access to that data. - - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the - data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the - preferred location as CPU memory. Setting the preferred location does not cause data to - migrate to that location immediately. Instead, it guides the migration policy when a fault - occurs on that memory region. If the data is already in its preferred location and the - faulting processor can establish a mapping without requiring the data to be migrated, then - the migration will be avoided. On the other hand, if the data is not in its preferred location - or if a direct mapping cannot be established, then it will be migrated to the processor accessing - it. It is important to note that setting the preferred location does not prevent data prefetching - done using ::cuMemPrefetchAsync. - Having a preferred location can override the thrash detection and resolution logic in the Unified - Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU - memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But - if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. - When the Unified Memory driver has to evict pages from a certain location on account of that - memory being oversubscribed, the preferred location will be used to decide the destination to which - a page should be evicted to. - If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred - location will be ignored for that subset. - - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION - and changes the preferred location to none. - - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. - This does not cause data migration and has no impact on the location of the data per se. Instead, - it causes the data to always be mapped in the specified processor's page tables, as long as the - location of the data permits a mapping to be established. If the data gets migrated for any reason, - the mappings are updated accordingly. - This advice is useful in scenarios where data locality is not important, but avoiding faults is. - Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the - data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data - over to the other GPUs is not as important because the accesses are infrequent and the overhead of - migration may be too high. But preventing faults can still help improve performance, and so having - a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated - to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the - ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the - page in CPU memory. - - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of - mappings may be removed at any time causing accesses to result in page faults. - - Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + Sets the current memory pool of a device + The memory pool must be local to the specified device. + ::cuMemAllocAsync allocates from the current mempool of the provided stream's device. + By default, a device's current memory pool is its default memory pool. - Note that this function is asynchronous with respect to the host and all work - on other devices. + note Use ::cuMemAllocFromPoolAsync to specify asynchronous allocations from a device different than the one the stream runs on. - Pointer to memory to set the advice for - Size in bytes of the memory range - Advice to be applied for the specified memory range - Device to apply the advice for + + - + - Query an attribute of a given memory range + Gets the current mempool for a device + Returns the last pool provided to ::cuDeviceSetMemPool for this device + or the device's default memory pool if ::cuDeviceSetMemPool has never been called. + By default the current mempool is the default mempool for a device. + Otherwise the returned pool must have been set with::cuDeviceSetMemPool. - A pointers to a memory location where the result of each attribute query will be written to. - Array containing the size of data - The attribute to query - Start of the range to query - Size of the range to query + + + - + - Query attributes of a given memory range. + Returns the default mempool of a device + The default mempool of a device contains device memory from that device. - A two-dimensional array containing pointers to memory locations where the result of each attribute query will be written to. - Array containing the sizes of each result - An array of attributes to query (numAttributes and the number of attributes in this array should match) - Number of attributes to query - Start of the range to query - Size of the range to query + + + - + - Allocates memory that will be automatically managed by the Unified Memory system - - Allocates bytesize bytes of managed memory on the device and returns in - dptr a pointer to the allocated memory. If the device doesn't support - allocating managed memory, is returned. Support - for managed memory can be queried using the device attribute - . The allocated memory is suitably - aligned for any kind of variable. The memory is not cleared. If bytesize - is 0, ::cuMemAllocManaged returns ::CUDA_ERROR_INVALID_VALUE. The pointer - is valid on the CPU and on all GPUs in the system that support managed memory. - All accesses to this pointer must obey the Unified Memory programming model. - - flags specifies the default stream association for this allocation. - flags must be one of ::CU_MEM_ATTACH_GLOBAL or ::CU_MEM_ATTACH_HOST. If - ::CU_MEM_ATTACH_GLOBAL is specified, then this memory is accessible from - any stream on any device. If ::CU_MEM_ATTACH_HOST is specified, then the - allocation is created with initial visibility restricted to host access only; - an explicit call to ::cuStreamAttachMemAsync will be required to enable access - on the device. - - If the association is later changed via ::cuStreamAttachMemAsync to - a single stream, the default association as specifed during ::cuMemAllocManaged - is restored when that stream is destroyed. For __managed__ variables, the - default association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a - stream is an asynchronous operation, and as a result, the change to default - association won't happen until all work in the stream has completed. - - Memory allocated with ::cuMemAllocManaged should be released with ::cuMemFree. - - On a multi-GPU system with peer-to-peer support, where multiple GPUs support - managed memory, the physical storage is created on the GPU which is active - at the time ::cuMemAllocManaged is called. All other GPUs will reference the - data at reduced bandwidth via peer mappings over the PCIe bus. The Unified - Memory management system does not migrate memory between GPUs. - - On a multi-GPU system where multiple GPUs support managed memory, but not - all pairs of such GPUs have peer-to-peer support between them, the physical - storage is created in 'zero-copy' or system memory. All GPUs will reference - the data at reduced bandwidth over the PCIe bus. In these circumstances, - use of the environment variable, CUDA_VISIBLE_DEVICES, is recommended to - restrict CUDA to only use those GPUs that have peer-to-peer support. This - environment variable is described in the CUDA programming guide under the - "CUDA environment variables" section. + Returns in device a device handle given a PCI bus ID string. - Returned device pointer - Requested allocation size in bytes - Must be one of or + Returned device handle + String in one of the following forms: + [domain]:[bus]:[device].[function] + [domain]:[bus]:[device] + [bus]:[device].[function] + where domain, bus, device, and function are all hexadecimal values CUDA Error Codes: , , , - , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Set attributes on a previously allocated memory region - The supported attributes are: - : A boolean attribute that can either be set (1) or unset (0). When set, - memory operations that are synchronous. If there are some previously initiated - synchronous memory operations that are pending when this attribute is set, the - function does not return until those memory operations are complete. - See further documentation in the section titled "API synchronization behavior" - to learn more about cases when synchronous memory operations can - exhibit asynchronous behavior. - value will be considered as a pointer to an unsigned integer to which this attribute is to be set. + Returns an ASCII string identifying the device dev in the NULL-terminated + string pointed to by pciBusId. len specifies the maximum length of the + string that may be returned. - Pointer to memory containing the value to be set - Pointer attribute to set - Pointer to a memory region allocated using CUDA memory allocation APIs + Returned identifier string for the device in the following format + [domain]:[bus]:[device].[function] + where domain, bus, device, and function are all hexadecimal values. + pciBusId should be large enough to store 13 characters including the NULL-terminator. + Maximum length of string to store in name + Device to get identifier string for CUDA Error Codes: , , , - , , . + , . - + - Returns information about a pointer. - The supported attributes are (refer to ::cuPointerGetAttribute for attribute descriptions and restrictions): + Takes as input a previously allocated event. This event must have been + created with the ::CU_EVENT_INTERPROCESS and ::CU_EVENT_DISABLE_TIMING + flags set. This opaque handle may be copied into other processes and + opened with ::cuIpcOpenEventHandle to allow efficient hardware + synchronization between GPU work in different processes. - - ::CU_POINTER_ATTRIBUTE_CONTEXT - - ::CU_POINTER_ATTRIBUTE_MEMORY_TYPE - - ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER - - ::CU_POINTER_ATTRIBUTE_HOST_POINTER - - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS - - ::CU_POINTER_ATTRIBUTE_BUFFER_ID - - ::CU_POINTER_ATTRIBUTE_IS_MANAGED + After the event has been been opened in the importing process, + ::cuEventRecord, ::cuEventSynchronize, ::cuStreamWaitEvent and + ::cuEventQuery may be used in either process. Performing operations + on the imported event after the exported event has been freed + with ::cuEventDestroy will result in undefined behavior. + + IPC functionality is restricted to devices with support for unified + addressing on Linux operating systems. - Number of attributes to query - An array of attributes to query (numAttributes and the number of attributes in this array should match) - A two-dimensional array containing pointers to memory - locations where the result of each attribute query will be written to. - Pointer to query - + Pointer to a user allocated CUipcEventHandle in which to return the opaque event handle + Event allocated with ::CU_EVENT_INTERPROCESS and ::CU_EVENT_DISABLE_TIMING flags. + CUDA Error Codes: , , , - + - Intra-device memcpy's done with these functions may execute in parallel with the CPU, - but if host memory is involved, they wait until the copy is done before returning. + Opens an interprocess event handle exported from another process with + ::cuIpcGetEventHandle. This function returns a ::CUevent that behaves like + a locally created event with the ::CU_EVENT_DISABLE_TIMING flag specified. + This event must be freed with ::cuEventDestroy. + + Performing operations on the imported event after the exported event has + been freed with ::cuEventDestroy will result in undefined behavior. + + IPC functionality is restricted to devices with support for unified + addressing on Linux operating systems. + Returns the imported event + Interprocess handle to open + CUDA Error Codes: , , , - + - Copies data between two pointers. - dst and src are base pointers of the destination and source, respectively. - ByteCount specifies the number of bytes to copy. - Note that this function infers the type of the transfer (host to host, host to - device, device to device, or device to host) from the pointer values. This - function is only allowed in contexts which support unified addressing. - Note that this function is synchronous. + Takes a pointer to the base of an existing device memory allocation created + with ::cuMemAlloc and exports it for use in another process. This is a + lightweight operation and may be called multiple times on an allocation + without adverse effects. + + If a region of memory is freed with ::cuMemFree and a subsequent call + to ::cuMemAlloc returns memory with the same device address, + ::cuIpcGetMemHandle will return a unique handle for the + new memory. + + IPC functionality is restricted to devices with support for unified + addressing on Linux operating systems. - Destination unified virtual address space pointer - Source unified virtual address space pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to user allocated ::CUipcMemHandle to return the handle in. + Base pointer to previously allocated device memory + CUDA Error Codes: , , , - + - Copies from device memory in one context to device memory in another - context. dstDevice is the base device pointer of the destination memory - and dstContext is the destination context. srcDevice is the base - device pointer of the source memory and srcContext is the source pointer. - ByteCount specifies the number of bytes to copy. + Maps memory exported from another process with ::cuIpcGetMemHandle into + the current device address space. For contexts on different devices + ::cuIpcOpenMemHandle can attempt to enable peer access between the + devices as if the user called ::cuCtxEnablePeerAccess. This behavior is + controlled by the ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS flag. + ::cuDeviceCanAccessPeer can determine if a mapping is possible. - Note that this function is asynchronous with respect to the host, but - serialized with respect all pending and future asynchronous work in to the - current context, srcContext, and dstContext (use - to avoid this synchronization). + Contexts that may open ::CUipcMemHandles are restricted in the following way. + ::CUipcMemHandles from each ::CUdevice in a given process may only be opened + by one ::CUcontext per ::CUdevice per other process. + + Memory returned from ::cuIpcOpenMemHandle must be freed with + ::cuIpcCloseMemHandle. + + Calling ::cuMemFree on an exported memory region before calling + ::cuIpcCloseMemHandle in the importing context will result in undefined + behavior. + + IPC functionality is restricted to devices with support for unified + addressing on Linux operating systems. - Destination device pointer - Destination context - Source device pointer - Source context - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned device pointer + ::CUipcMemHandle to open + Flags for this operation. Must be specified as ::CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS + CUDA Error Codes: , , + , , - + - Perform a 3D memory copy according to the parameters specified in - pCopy. See the definition of the structure - for documentation of its parameters. - Note that this function is synchronous with respect to the host only if - the source or destination memory is of type ::CU_MEMORYTYPE_HOST. - Note also that this copy is serialized with respect all pending and future - asynchronous work in to the current context, the copy's source context, - and the copy's destination context (use to avoid - this synchronization). + Unmaps memory returnd by ::cuIpcOpenMemHandle. The original allocation + in the exporting process as well as imported mappings in other processes + will be unaffected. + + Any resources used to enable peer access will be freed if this is the + last mapping using them. + + IPC functionality is restricted to devices with support for unified + addressing on Linux operating systems. - Parameters for the memory copy - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Device pointer returned by ::cuIpcOpenMemHandle + CUDA Error Codes: , , + , - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about the execution affinity support of the device. + Returns in \p *pi whether execution affinity type \p type is supported by device \p dev. + The supported types are: + - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT: 1 if context with limited SMs is supported by the device, + or 0 if not; - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + 1 if the execution affinity type \p type is supported by the device, or 0 if not + Execution affinity type to query + Device handle - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Combines all API calls for context management - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Creates a new CUDA context and associates it with the calling thread. The flags parameter is described in . The + context is created with a usage count of 1 and the caller of must call or + when done using the context. If a context is already current to the thread, it is supplanted by the newly created context + and may be restored by a subsequent call to . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned context handle of the new context + Context creation flags. See + Device to create context on CUDA Error Codes: , , , - , . + , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Create a CUDA context with execution affinity + Creates a new CUDA context with execution affinity and associates it with + the calling thread.The \p paramsArray and \p flags parameter are described below. + The context is created with a usage count of 1 and the caller of ::cuCtxCreate() must + call::cuCtxDestroy() or when done using the context.If a context is already + current to the thread, it is supplanted by the newly created context and may + be restored by a subsequent call to ::cuCtxPopCurrent(). + The type and the amount of execution resource the context can use is limited by \p paramsArray + and \p numParams.The \p paramsArray is an array of \p CUexecAffinityParam and the \p numParams + describes the size of the array. If two \p CUexecAffinityParam in the array have the same type, + the latter execution affinity parameter overrides the former execution affinity parameter. + + The supported execution affinity types are: + ::CU_EXEC_AFFINITY_TYPE_SM_COUNT limits the portion of SMs that the context can use.The portion + of SMs is specified as the number of SMs via \p CUexecAffinitySmCount. This limit will be internally + rounded up to the next hardware-supported amount. Hence, it is imperative to query the actual execution + affinity of the context via \p cuCtxGetExecAffinity after context creation.Currently, this attribute + is only supported under Volta+ MPS. + + Returned context handle of the new context + + + Context creation flags. See + Device to create context on + + + + + Destroys the CUDA context specified by ctx. The context ctx will be destroyed regardless of how many threads it is current to. + It is the responsibility of the calling function to ensure that no API call is issued to ctx while cuCtxDestroy_v2() is executing. + If ctx is current to the calling thread then ctx will also be + popped from the current thread's context stack (as though cuCtxPopCurrent() + were called). If ctx is current to other threads, then ctx will + remain current to those threads, and attempting to access ctx from + those threads will result in the error . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Context to destroy CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Increments the usage count of the context and passes back a context handle in pctx that must be passed to + when the application is done with the context. fails if there is no context current to the + thread. Currently, the flags parameter must be . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned context handle of the current context + Context attach flags (must be ) CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Decrements the usage count of the context ctx, and destroys the context if the usage count goes to 0. The context + must be a handle that was passed back by or , and must be current to the calling thread. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Context to destroy CUDA Error Codes: , , , - , . + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Pushes the given context ctx onto the CPU thread’s stack of current contexts. The specified context becomes the + CPU thread’s current context, so all CUDA functions that operate on the current context are affected. + The previous current context may be made current again by calling or . + The context must be "floating," i.e. not attached to any thread. Contexts are made to float by calling . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Floating context to attach CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Pops the current CUDA context from the CPU thread. The CUDA context must have a usage count of 1. CUDA contexts + have a usage count of 1 upon creation; the usage count may be incremented with and decremented + with . + If successful, passes back the old context handle in pctx. That context may then be made current + to a different CPU thread by calling . + Floating contexts may be destroyed by calling . + If a context was current to the CPU thread before or was called, this function makes + that context current to the CPU thread again. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned new context handle CUDA Error Codes: , , , - , . + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Binds the specified CUDA context to the calling CPU thread. + If ctx is NULL then the CUDA context previously bound to the + calling CPU thread is unbound and is returned. + + If there exists a CUDA context stack on the calling CPU thread, this + will replace the top of that stack with ctx. + If ctx is NULL then this will be equivalent to popping the top + of the calling CPU thread's CUDA context stack (or a no-op if the + calling CPU thread's CUDA context stack is empty). - Destination device pointer - Source host pointer - Size of memory copy in bytes + Context to bind to the calling CPU thread CUDA Error Codes: , , , - , . + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns in ctx the CUDA context bound to the calling CPU thread. + If no context is bound to the calling CPU thread then ctx is + set to NULL and is returned. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned context handle CUDA Error Codes: , , , - , . + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns in device the ordinal of the current context’s device. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned device ID for the current context CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Blocks until the device has completed all preceding requested tasks. returns an error if one of the + preceding tasks failed. If the context was created with the flag, the CPU thread will + block until the GPU context has finished its work. - Destination device pointer - Source host pointer - Size of memory copy in bytes CUDA Error Codes: , , , - , . + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns the API version used to create ctx in version. If ctx + is NULL, returns the API version used to create the currently bound + context. + This wil return the API version used to create a context (for example, + 3010 or 3020), which library developers can use to direct callers to a + specific API version. Note that this API version may not be the same as + returned by . - Destination device pointer - Source host pointer - Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + On devices where the L1 cache and shared memory use the same hardware + resources, this function returns through pconfig the preferred cache configuration + for the current context. This is only a preference. The driver will use + the requested configuration if possible, but it is free to choose a different + configuration if required to execute functions. + This will return a pconfig of on devices + where the size of the L1 cache and shared memory are fixed. - Destination device pointer - Source host pointer - Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + On devices where the L1 cache and shared memory use the same hardware + resources, this sets through config the preferred cache configuration for + the current context. This is only a preference. The driver will use + the requested configuration if possible, but it is free to choose a different + configuration if required to execute the function. Any function preference + set via will be preferred over this context-wide + setting. Setting the context-wide cache configuration to + will cause subsequent kernel launches to prefer + to not change the cache configuration unless required to launch the kernel. + This setting does nothing on devices where the size of the L1 cache and + shared memory are fixed. + Launching a kernel with a different preference than the most recent + preference setting may insert a device-side synchronization point. - Destination device pointer - Source host pointer - Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + + + Returns the current shared memory configuration for the current context. + + This function will return in \p pConfig the current size of shared memory banks + in the current context. On devices with configurable shared memory banks, + can be used to change this setting, so that all + subsequent kernel launches will by default use the new bank size. When + is called on devices without configurable shared + memory, it will return the fixed bank size of the hardware. + + The returned bank configurations can be either: + - : set shared memory bank width to + be natively four bytes. + - : set shared memory bank width to + be natively eight bytes. + + returned shared memory configuration + CUDA Error Codes: , , , + , . + + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Sets the shared memory configuration for the current context. + On devices with configurable shared memory banks, this function will set + the context's shared memory bank size which is used for subsequent kernel + launches. + Changed the shared memory configuration between launches may insert a device + side synchronization point between those launches. + Changing the shared memory bank size will not increase shared memory usage + or affect occupancy of kernels, but may have major effects on performance. + Larger bank sizes will allow for greater potential bandwidth to shared memory, + but will change what kinds of accesses to shared memory will result in bank + conflicts. + This function will do nothing on devices with fixed shared memory bank size. + + The supported bank configurations are: + - : set bank width to the default initial + setting (currently, four bytes). + - : set shared memory bank width to + be natively four bytes. + - : set shared memory bank width to + be natively eight bytes. - Destination device pointer - Source host pointer - Size of memory copy in bytes + requested shared memory configuration CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns numerical values that correspond to the least and greatest stream priorities. + Returns in leastPriority and greatestPriority the numerical values that correspond + to the least and greatest stream priorities respectively. Stream priorities + follow a convention where lower numbers imply greater priorities. The range of + meaningful stream priorities is given by [greatestPriority, leastPriority]. + If the user attempts to create a stream with a priority value that is + outside the meaningful range as specified by this API, the priority is + automatically clamped down or up to either leastPriority or greatestPriority + respectively. See ::cuStreamCreateWithPriority for details on creating a + priority stream. + A NULL may be passed in for leastPriority or greatestPriority if the value + is not desired. + This function will return '0' in both leastPriority and greatestPriority if + the current context's device does not support stream priorities + (see ::cuDeviceGetAttribute). - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to an int in which the numerical value for least + stream priority is returned + Pointer to an int in which the numerical value for greatest stream priority is returned + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Resets all persisting lines in cache to normal status. + ::cuCtxResetPersistingL2Cache Resets all persisting lines in cache to normal + status.Takes effect on function return. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns the execution affinity setting for the current context. + Returns in \p *pExecAffinity the current value of \p type. The supported ::CUexecAffinityType values are: + - ::CU_EXEC_AFFINITY_TYPE_SM_COUNT: number of SMs the context is limited to use. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + + + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns the flags for the current context + Returns in \p *flags the flags of the current context. See ::cuCtxCreate for flag values. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to store flags of current context + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Retain the primary context on the GPU. + Retains the primary context on the device, creating it if necessary, + increasing its usage count. The caller must call + ::cuDevicePrimaryCtxRelease() when done using the context. + Unlike ::cuCtxCreate() the newly created context is not pushed onto the stack. + + Context creation will fail with ::CUDA_ERROR_UNKNOWN if the compute mode of + the device is ::CU_COMPUTEMODE_PROHIBITED. Similarly, context creation will + also fail with ::CUDA_ERROR_UNKNOWN if the compute mode for the device is + set to ::CU_COMPUTEMODE_EXCLUSIVE and there is already an active, non-primary, + context on the device. The function ::cuDeviceGetAttribute() can be used with + ::CU_DEVICE_ATTRIBUTE_COMPUTE_MODE to determine the compute mode of the + device. The nvidia-smi tool can be used to set the compute mode for + devices. Documentation for nvidia-smi can be obtained by passing a + -h option to it. + + Please note that the primary context always supports pinned allocations. Other + flags can be specified by ::cuDevicePrimaryCtxSetFlags(). - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned context handle of the new context + Device for which primary context is requested + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Release the primary context on the GPU + Releases the primary context interop on the device by decreasing the usage + count by 1. If the usage drops to 0 the primary context of device \p dev + will be destroyed regardless of how many threads it is current to. + + Please note that unlike ::cuCtxDestroy() this method does not pop the context + from stack in any circumstances. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Device which primary context is released + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Set flags for the primary context + Sets the flags for the primary context on the device overwriting perviously + set ones. If the primary context is already created + ::CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE is returned. + + The three LSBs of the \p flags parameter can be used to control how the OS + thread, which owns the CUDA context at the time of an API call, interacts + with the OS scheduler when waiting for results from the GPU. Only one of + the scheduling flags can be set when creating a context. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Device for which the primary context flags are set + New flags for the device + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Get the state of the primary context + Returns in \p *flags the flags for the primary context of \p dev, and in + \p *active whether it is active. See ::cuDevicePrimaryCtxSetFlags for flag + values. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Device to get primary context flags for + Pointer to store flags + Pointer to store context state; 0 = inactive, 1 = active + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Destroy all allocations and reset all state on the primary context + + Explicitly destroys and cleans up all resources associated with the current + device in the current process. + + Note that it is responsibility of the calling function to ensure that no + other module in the process is using the device any more. For that reason + it is recommended to use ::cuDevicePrimaryCtxRelease() in most cases. + However it is safe for other modules to call ::cuDevicePrimaryCtxRelease() + even after resetting the device. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Device for which primary context is destroyed + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Combines all API calls for module management - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Takes a filename fname and loads the corresponding module module into the current context. The CUDA driver API + does not attempt to lazily allocate the resources needed by a module; if the memory for functions and data (constant + and global) needed by the module cannot be allocated, fails. The file should be a cubin file as output + by nvcc or a PTX file, either as output by nvcc or handwrtten. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned module + Filename of module to load CUDA Error Codes: , , , - , . + , , , + , , , + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Takes a byte[] as image and loads the corresponding module module into the current context. The byte array may be obtained + by mapping a cubin or PTX file, passing a cubin or PTX file as a null-terminated text string. + The byte[] is a replacement for the original pointer. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned module + Module data to load CUDA Error Codes: , , , - , . + , , + , , + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Takes a byte[] as image and loads the corresponding module module into the current context. The byte array may be obtained + by mapping a cubin or PTX file, passing a cubin or PTX file as a null-terminated text string. + Options are passed as an array via options and any corresponding parameters are passed + in optionValues. The number of total options is supplied via numOptions. Any outputs will be returned via + optionValues. Supported options are definen in . + The options values are currently passed in IntPtr-type and should then be cast into their real type. This might change in future. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned module + Module data to load + Number of options + Options for JIT + Option values for JIT CUDA Error Codes: , , , - , . + , , + , , + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Takes a byte[] as fatCubin and loads the corresponding module module into the current context. The byte[] + represents a fat binary object, which is a collection of different cubin files, all representing the same device code, but + compiled and optimized for different architectures. Prior to CUDA 4.0, there was no documented API for constructing and using + fat binary objects by programmers. Starting with CUDA 4.0, fat binary objects can be constructed by providing the -fatbin option to nvcc. + More information can be found in the nvcc document. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned module + Fat binary to load CUDA Error Codes: , , , - , . + , , , + , , + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Unloads a module hmod from the current context. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Module to unload CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns in hfunc the handle of the function of name name located in module hmod. If no function of that name + exists, returns . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned function handle + Module to retrieve function from + Name of function to retrieve CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns in dptr and bytes the base pointer and size of the global of name name located in module hmod. If no + variable of that name exists, returns . Both parameters dptr + and bytes are optional. If one of them is null, it is ignored. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned global device pointer + Returned global size in bytes + Module to retrieve global from + Name of global to retrieve CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns in pTexRef the handle of the texture reference of name name in the module hmod. If no texture reference + of that name exists, returns . This texture reference handle + should not be destroyed, since it will be destroyed when the module is unloaded. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned texture reference + Module to retrieve texture reference from + Name of texture reference to retrieve CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns in pSurfRef the handle of the surface reference of name name in the module hmod. If no surface reference + of that name exists, returns . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned surface reference + Module to retrieve surface reference from + Name of surface reference to retrieve CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Creates a pending JIT linker invocation. + If the call is successful, the caller owns the returned CUlinkState, which should eventually be destroyed with ::cuLinkDestroy. + The device code machine size (32 or 64 bit) will match the calling application. + Both linker and compiler options may be specified. Compiler options will be applied to inputs to this linker action which must + be compiled from PTX. The options ::CU_JIT_WALL_TIME, + ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, and ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES will accumulate data until the CUlinkState is destroyed. + optionValues must remain valid for the life of the CUlinkState if output options are used. No other references to inputs are maintained after this call returns. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Size of options arrays + Array of linker and compiler options + Array of option values, each cast to void * + On success, this will contain a CUlinkState to specify and complete this action + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Add an input to a pending linker invocation. + Ownership of data data is retained by the caller. No reference is retained to any inputs after this call returns. + This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of + ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER, ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + A pending linker action. + The type of the input data. + The input data. PTX must be NULL-terminated. + The length of the input data. + An optional name for this input in log messages. + Size of options. + Options to be applied only for this input (overrides options from ::cuLinkCreate). + Array of option values, each cast to void *. + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Add a file input to a pending linker invocation. + No reference is retained to any inputs after this call returns. + This method accepts only compiler options, which are used if the data must be compiled from PTX, and does not accept any of + ::CU_JIT_WALL_TIME, ::CU_JIT_INFO_LOG_BUFFER, ::CU_JIT_ERROR_LOG_BUFFER, ::CU_JIT_TARGET_FROM_CUCONTEXT, or ::CU_JIT_TARGET. + This method is equivalent to invoking ::cuLinkAddData on the contents of the file. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + A pending linker action. + The type of the input data. + Path to the input file. + Size of options. + Options to be applied only for this input (overrides options from ::cuLinkCreate). + Array of option values, each cast to void *. + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Complete a pending linker invocation. + Completes the pending linker action and returns the cubin image for the linked + device code, which can be used with ::cuModuleLoadData. The cubin is owned by + state, so it should be loaded before state is destroyed via ::cuLinkDestroy. + This call does not destroy state. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + A pending linker invocation + On success, this will point to the output image + Optional parameter to receive the size of the generated image + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Destroys state for a JIT linker invocation. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + State object for the linker invocation + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Combines all API calls for memory management - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns in free and total respectively, the free and total amount of memory available for allocation by the + CUDA context, in bytes. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned free memory in bytes + Returned total memory in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Allocates bytesize bytes of linear memory on the device and returns in dptr a pointer to the allocated memory. + The allocated memory is suitably aligned for any kind of variable. The memory is not cleared. If bytesize is 0, + returns . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned device pointer + Requested allocation size in bytes CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Allocates at least WidthInBytes * Height bytes of linear memory on the device and returns in dptr a pointer + to the allocated memory. The function may pad the allocation to ensure that corresponding pointers in any given + row will continue to meet the alignment requirements for coalescing as the address is updated from row to row. + ElementSizeBytes specifies the size of the largest reads and writes that will be performed on the memory range. + ElementSizeBytes may be 4, 8 or 16 (since coalesced memory transactions are not possible on other data sizes). If + ElementSizeBytes is smaller than the actual read/write size of a kernel, the kernel will run correctly, but possibly + at reduced speed. The pitch returned in pPitch by is the width in bytes of the allocation. The + intended usage of pitch is as a separate parameter of the allocation, used to compute addresses within the 2D array. + Given the row and column of an array element of type T, the address is computed as: + T * pElement = (T*)((char*)BaseAddress + Row * Pitch) + Column; + The pitch returned by is guaranteed to work with under all circumstances. For + allocations of 2D arrays, it is recommended that programmers consider performing pitch allocations using . + Due to alignment restrictions in the hardware, this is especially true if the application will be performing + 2D memory copies between different regions of device memory (whether linear memory or CUDA arrays). + The byte alignment of the pitch returned by is guaranteed to match or exceed the alignment + requirement for texture binding with . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned device pointer + Returned pitch of allocation in bytes + Requested allocation width in bytes + Requested allocation height in rows + Size of largest reads/writes for range CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Frees the memory space pointed to by dptr, which must have been returned by a previous call to or + . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Pointer to memory to free CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns the base address in pbase and size in psize of the allocation by or + that contains the input pointer dptr. Both parameters pbase and psize are optional. If one of them is null, it is + ignored. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned base address + Returned size of device memory allocation + Device pointer to query CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Allocates bytesize bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual + memory ranges allocated with this function and automatically accelerates calls to functions such as . + Since the memory can be accessed directly by the device, it can be read or written with much higher bandwidth than + pageable memory obtained with functions such as malloc(). Allocating excessive amounts of memory with + may degrade system performance, since it reduces the amount of memory available to the system for paging. + As a result, this function is best used sparingly to allocate staging areas for data exchange between host and device. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned host pointer to page-locked memory + Requested allocation size in bytes CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Frees the memory space pointed to by p, which must have been returned by a previous call to . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Pointer to memory to free CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Allocates bytesize bytes of host memory that is page-locked and accessible to the device. The driver tracks the virtual + memory ranges allocated with this function and automatically accelerates calls to functions such as . + Since the memory can be accessed directly by the device, it can be read or written with much higher bandwidth than + pageable memory obtained with functions such as malloc(). Allocating excessive amounts of pinned + memory may degrade system performance, since it reduces the amount of memory available to the system for paging. + As a result, this function is best used sparingly to allocate staging areas for data exchange between host and device. + For the Flags parameter see . + The CUDA context must have been created with the flag in order for the + flag to have any effect. + The flag may be specified on CUDA contexts for devices that do not support + mapped pinned memory. The failure is deferred to because the memory may be + mapped into other CUDA contexts via the flag. + The memory allocated by this function must be freed with . + Note all host memory allocated using will automatically + be immediately accessible to all contexts on all devices which support unified + addressing (as may be queried using ::CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING). + Unless the flag ::CU_MEMHOSTALLOC_WRITECOMBINED is specified, the device pointer + that may be used to access this host memory from those contexts is always equal + to the returned host pointer pp. If the flag ::CU_MEMHOSTALLOC_WRITECOMBINED + is specified, then the function must be used + to query the device pointer, even if the context supports unified addressing. + See \ref CUDA_UNIFIED for additional details. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned host pointer to page-locked memory + Requested allocation size in bytes + Flags for allocation request CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Passes back the device pointer pdptr corresponding to the mapped, pinned host buffer p allocated by . + will fail if the flag was not specified at the + time the memory was allocated, or if the function is called on a GPU that does not support mapped pinned memory. + Flags provides for future releases. For now, it must be set to 0. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned device pointer + Host pointer + Options (must be 0) CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Passes back the flags pFlags that were specified when allocating the pinned host buffer p allocated by + . + will fail if the pointer does not reside in an allocation performed by or + . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned flags + Host pointer CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Page-locks the memory range specified by p and bytesize and maps it + for the device(s) as specified by Flags. This memory range also is added + to the same tracking mechanism as ::cuMemHostAlloc to automatically accelerate + calls to functions such as . Since the memory can be accessed + directly by the device, it can be read or written with much higher bandwidth + than pageable memory that has not been registered. Page-locking excessive + amounts of memory may degrade system performance, since it reduces the amount + of memory available to the system for paging. As a result, this function is + best used sparingly to register staging areas for data exchange between + host and device. + The pointer p and size bytesize must be aligned to the host page size (4 KB). + The memory page-locked by this function must be unregistered with - Destination device pointer - Source host pointer - Size of memory copy in bytes + Host pointer to memory to page-lock + Size in bytes of the address range to page-lock + Flags for allocation request CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Unmaps the memory range whose base address is specified by p, and makes it pageable again. + The base address must be the same one specified to . - Destination device pointer - Source host pointer - Size of memory copy in bytes + Host pointer to memory to page-lock CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned pointer attribute value + Pointer attribute to query + Pointer CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned pointer attribute value + Pointer attribute to query + Pointer CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned pointer attribute value + Pointer attribute to query + Pointer CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned pointer attribute value + Pointer attribute to query + Pointer CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned pointer attribute value + Pointer attribute to query + Pointer CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned pointer attribute value + Pointer attribute to query + Pointer CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned pointer attribute value + Pointer attribute to query + Pointer CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Prefetches memory to the specified destination device + Prefetches memory to the specified destination device. devPtr is the + base device pointer of the memory to be prefetched and dstDevice is the + destination device. count specifies the number of bytes to copy. hStream + is the stream in which the operation is enqueued. + + Passing in CU_DEVICE_CPU for dstDevice will prefetch the data to CPU memory. + + If no physical memory has been allocated for this region, then this memory region + will be populated and mapped on the destination device. If there's insufficient + memory to prefetch the desired region, the Unified Memory driver may evict pages + belonging to other memory regions to make room. If there's no memory that can be + evicted, then the Unified Memory driver will prefetch less than what was requested. + + In the normal case, any mappings to the previous location of the migrated pages are + removed and mappings for the new location are only setup on the dstDevice. + The application can exercise finer control on these mappings using ::cudaMemAdvise. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to be prefetched + Size in bytes + Destination device to prefetch to + Stream to enqueue prefetch operation + Note that this function is asynchronous with respect to the host and all work on other devices. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Advise about the usage of a given memory range + Advise the Unified Memory subsystem about the usage pattern for the memory range starting at devPtr with a size of count bytes. + + The \p advice parameter can take the following values: + - ::CU_MEM_ADVISE_SET_READ_MOSTLY: This implies that the data is mostly going to be read + from and only occasionally written to. This allows the driver to create read-only + copies of the data in a processor's memory when that processor accesses it. Similarly, + if cuMemPrefetchAsync is called on this region, it will create a read-only copy of + the data on the destination processor. When a processor writes to this data, all copies + of the corresponding page are invalidated except for the one where the write occurred. + The \p device argument is ignored for this advice. + - ::CU_MEM_ADVISE_UNSET_READ_MOSTLY: Undoes the effect of ::CU_MEM_ADVISE_SET_READ_MOSTLY. Any read + duplicated copies of the data will be freed no later than the next write access to that data. + - ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION: This advice sets the preferred location for the + data to be the memory belonging to \p device. Passing in CU_DEVICE_CPU for \p device sets the + preferred location as CPU memory. Setting the preferred location does not cause data to + migrate to that location immediately. Instead, it guides the migration policy when a fault + occurs on that memory region. If the data is already in its preferred location and the + faulting processor can establish a mapping without requiring the data to be migrated, then + the migration will be avoided. On the other hand, if the data is not in its preferred location + or if a direct mapping cannot be established, then it will be migrated to the processor accessing + it. It is important to note that setting the preferred location does not prevent data prefetching + done using ::cuMemPrefetchAsync. + Having a preferred location can override the thrash detection and resolution logic in the Unified + Memory driver. Normally, if a page is detected to be constantly thrashing between CPU and GPU + memory say, the page will eventually be pinned to CPU memory by the Unified Memory driver. But + if the preferred location is set as GPU memory, then the page will continue to thrash indefinitely. + When the Unified Memory driver has to evict pages from a certain location on account of that + memory being oversubscribed, the preferred location will be used to decide the destination to which + a page should be evicted to. + If ::CU_MEM_ADVISE_SET_READ_MOSTLY is also set on this memory region or any subset of it, the preferred + location will be ignored for that subset. + - ::CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: Undoes the effect of ::CU_MEM_ADVISE_SET_PREFERRED_LOCATION + and changes the preferred location to none. + - ::CU_MEM_ADVISE_SET_ACCESSED_BY: This advice implies that the data will be accessed by \p device. + This does not cause data migration and has no impact on the location of the data per se. Instead, + it causes the data to always be mapped in the specified processor's page tables, as long as the + location of the data permits a mapping to be established. If the data gets migrated for any reason, + the mappings are updated accordingly. + This advice is useful in scenarios where data locality is not important, but avoiding faults is. + Consider for example a system containing multiple GPUs with peer-to-peer access enabled, where the + data located on one GPU is occasionally accessed by other GPUs. In such scenarios, migrating data + over to the other GPUs is not as important because the accesses are infrequent and the overhead of + migration may be too high. But preventing faults can still help improve performance, and so having + a mapping set up in advance is useful. Note that on CPU access of this data, the data may be migrated + to CPU memory because the CPU typically cannot access GPU memory directly. Any GPU that had the + ::CU_MEM_ADVISE_SET_ACCESSED_BY flag set for this data will now have its mapping updated to point to the + page in CPU memory. + - ::CU_MEM_ADVISE_UNSET_ACCESSED_BY: Undoes the effect of CU_MEM_ADVISE_SET_ACCESSED_BY. The current set of + mappings may be removed at any time causing accesses to result in page faults. + + Passing in ::CU_DEVICE_CPU for \p device will set the advice for the CPU. + + Note that this function is asynchronous with respect to the host and all work + on other devices. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to memory to set the advice for + Size in bytes of the memory range + Advice to be applied for the specified memory range + Device to apply the advice for + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Query an attribute of a given memory range - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + A pointers to a memory location where the result of each attribute query will be written to. + Array containing the size of data + The attribute to query + Start of the range to query + Size of the range to query - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Query attributes of a given memory range. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + A two-dimensional array containing pointers to memory locations where the result of each attribute query will be written to. + Array containing the sizes of each result + An array of attributes to query (numAttributes and the number of attributes in this array should match) + Number of attributes to query + Start of the range to query + Size of the range to query - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Allocates memory that will be automatically managed by the Unified Memory system + + Allocates bytesize bytes of managed memory on the device and returns in + dptr a pointer to the allocated memory. If the device doesn't support + allocating managed memory, is returned. Support + for managed memory can be queried using the device attribute + . The allocated memory is suitably + aligned for any kind of variable. The memory is not cleared. If bytesize + is 0, ::cuMemAllocManaged returns ::CUDA_ERROR_INVALID_VALUE. The pointer + is valid on the CPU and on all GPUs in the system that support managed memory. + All accesses to this pointer must obey the Unified Memory programming model. + + flags specifies the default stream association for this allocation. + flags must be one of ::CU_MEM_ATTACH_GLOBAL or ::CU_MEM_ATTACH_HOST. If + ::CU_MEM_ATTACH_GLOBAL is specified, then this memory is accessible from + any stream on any device. If ::CU_MEM_ATTACH_HOST is specified, then the + allocation is created with initial visibility restricted to host access only; + an explicit call to ::cuStreamAttachMemAsync will be required to enable access + on the device. + + If the association is later changed via ::cuStreamAttachMemAsync to + a single stream, the default association as specifed during ::cuMemAllocManaged + is restored when that stream is destroyed. For __managed__ variables, the + default association is always ::CU_MEM_ATTACH_GLOBAL. Note that destroying a + stream is an asynchronous operation, and as a result, the change to default + association won't happen until all work in the stream has completed. + + Memory allocated with ::cuMemAllocManaged should be released with ::cuMemFree. + + On a multi-GPU system with peer-to-peer support, where multiple GPUs support + managed memory, the physical storage is created on the GPU which is active + at the time ::cuMemAllocManaged is called. All other GPUs will reference the + data at reduced bandwidth via peer mappings over the PCIe bus. The Unified + Memory management system does not migrate memory between GPUs. + + On a multi-GPU system where multiple GPUs support managed memory, but not + all pairs of such GPUs have peer-to-peer support between them, the physical + storage is created in 'zero-copy' or system memory. All GPUs will reference + the data at reduced bandwidth over the PCIe bus. In these circumstances, + use of the environment variable, CUDA_VISIBLE_DEVICES, is recommended to + restrict CUDA to only use those GPUs that have peer-to-peer support. This + environment variable is described in the CUDA programming guide under the + "CUDA environment variables" section. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Returned device pointer + Requested allocation size in bytes + Must be one of or CUDA Error Codes: , , , - , . + , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Set attributes on a previously allocated memory region + The supported attributes are: + : A boolean attribute that can either be set (1) or unset (0). When set, + memory operations that are synchronous. If there are some previously initiated + synchronous memory operations that are pending when this attribute is set, the + function does not return until those memory operations are complete. + See further documentation in the section titled "API synchronization behavior" + to learn more about cases when synchronous memory operations can + exhibit asynchronous behavior. + value will be considered as a pointer to an unsigned integer to which this attribute is to be set. - Destination device pointer - Source host pointer - Size of memory copy in bytes + Pointer to memory containing the value to be set + Pointer attribute to set + Pointer to a memory region allocated using CUDA memory allocation APIs CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , , . - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returns information about a pointer. + The supported attributes are (refer to ::cuPointerGetAttribute for attribute descriptions and restrictions): + + - ::CU_POINTER_ATTRIBUTE_CONTEXT + - ::CU_POINTER_ATTRIBUTE_MEMORY_TYPE + - ::CU_POINTER_ATTRIBUTE_DEVICE_POINTER + - ::CU_POINTER_ATTRIBUTE_HOST_POINTER + - ::CU_POINTER_ATTRIBUTE_SYNC_MEMOPS + - ::CU_POINTER_ATTRIBUTE_BUFFER_ID + - ::CU_POINTER_ATTRIBUTE_IS_MANAGED - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Number of attributes to query + An array of attributes to query (numAttributes and the number of attributes in this array should match) + A two-dimensional array containing pointers to memory + locations where the result of each attribute query will be written to. + Pointer to query + - - - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + + + Allocate an address range reservation. + Reserves a virtual address range based on the given parameters, giving + the starting address of the range in \p ptr.This API requires a system that + supports UVA.The size and address parameters must be a multiple of the + host page size and the alignment must be a power of two or zero for default + alignment. + + Resulting pointer to start of virtual address range allocated + Size of the reserved virtual address range requested + Alignment of the reserved virtual address range requested + Fixed starting address range requested + Currently unused, must be zero + + + + Free an address range reservation. + Frees a virtual address range reserved by cuMemAddressReserve. The size + must match what was given to memAddressReserve and the ptr given must + match what was returned from memAddressReserve. + + Starting address of the virtual address range to free + Size of the virtual address region to free + + + + Create a shareable memory handle representing a memory allocation of a given size described by the given properties + This creates a memory allocation on the target device specified through the + \p prop strcuture.The created allocation will not have any device or host + mappings.The generic memory \p handle for the allocation can be + mapped to the address space of calling process via::cuMemMap.This handle + cannot be transmitted directly to other processes(see + ::cuMemExportToShareableHandle). On Windows, the caller must also pass + an LPSECURITYATTRIBUTE in \p prop to be associated with this handle which + limits or allows access to this handle for a recepient process (see + ::CUmemAllocationProp::win32HandleMetaData for more). The \p size of this + allocation must be a multiple of the the value given via + ::cuMemGetAllocationGranularity with the ::CU_MEM_ALLOC_GRANULARITY_MINIMUM + flag. + + Value of handle returned. All operations on this allocation are to be performed using this handle. + Size of the allocation requested + Properties of the allocation to create. + flags for future use, must be zero now. + + + + Release a memory handle representing a memory allocation which was previously allocated through cuMemCreate. + Frees the memory that was allocated on a device through cuMemCreate. + + The memory allocation will be freed when all outstanding mappings to the memory + are unmapped and when all outstanding references to the handle(including it's + shareable counterparts) are also released.The generic memory handle can be + freed when there are still outstanding mappings made with this handle.Each + time a recepient process imports a shareable handle, it needs to pair it with + ::cuMemRelease for the handle to be freed.If \p handle is not a valid handle + the behavior is undefined. + + handle Value of handle which was returned previously by cuMemCreate. + + + + Maps an allocation handle to a reserved virtual address range. + Maps bytes of memory represented by \p handle starting from byte \p offset to + \p size to address range[\p addr, \p addr + \p size]. This range must be an + address reservation previously reserved with ::cuMemAddressReserve, and + \p offset + \p size must be less than the size of the memory allocation. + Both \p ptr, \p size, and \p offset must be a multiple of the value given via + ::cuMemGetAllocationGranularity with the::CU_MEM_ALLOC_GRANULARITY_MINIMUM flag. + Please note calling::cuMemMap does not make the address accessible, + the caller needs to update accessibility of a contiguous mapped VA + range by calling::cuMemSetAccess. + Once a recipient process obtains a shareable memory handle + from::cuMemImportFromShareableHandle, the process must + use ::cuMemMap to map the memory into its address ranges before + setting accessibility with::cuMemSetAccess. + ::cuMemMap can only create mappings on VA range reservations + that are not currently mapped. + + Address where memory will be mapped. + Size of the memory mapping. + Offset into the memory represented by \p handle from which to start mapping - Note: currently must be zero. + Handle to a shareable memory + flags for future use, must be zero now. + + + + Maps or unmaps subregions of sparse CUDA arrays and sparse CUDA mipmapped arrays + + List of ::CUarrayMapInfo + Count of ::CUarrayMapInfo in \p mapInfoList + Stream identifier for the stream to use for map or unmap operations + + + + + Unmap the backing memory of a given address range. + The range must be the entire contiguous address range that was mapped to. In + other words, ::cuMemUnmap cannot unmap a sub-range of an address range mapped + by::cuMemCreate / ::cuMemMap.Any backing memory allocations will be freed + if there are no existing mappings and there are no unreleased memory handles. + When::cuMemUnmap returns successfully the address range is converted to an + address reservation and can be used for a future calls to ::cuMemMap.Any new + mapping to this virtual address will need to have access granted through + ::cuMemSetAccess, as all mappings start with no accessibility setup. + + Starting address for the virtual address range to unmap + Size of the virtual address range to unmap + + + + Set the access flags for each location specified in \p desc for the given virtual address range + Given the virtual address range via \p ptr and \p size, and the locations + in the array given by \p desc and \p count, set the access flags for the + target locations.The range must be a fully mapped address range + containing all allocations created by ::cuMemMap / ::cuMemCreate. + + Starting address for the virtual address range + Length of the virtual address range + Array of ::CUmemAccessDesc that describe how to change the mapping for each location specified + Number of ::CUmemAccessDesc in \p desc + + + + Get the access \p flags set for the given \p location and \p ptr + + Flags set for this location + Location in which to check the flags for + Address in which to check the access flags for + + + + Exports an allocation to a requested shareable handle type + Given a CUDA memory handle, create a shareable memory + allocation handle that can be used to share the memory with other + processes.The recipient process can convert the shareable handle back into a + CUDA memory handle using ::cuMemImportFromShareableHandle and map + it with::cuMemMap.The implementation of what this handle is and how it + can be transferred is defined by the requested handle type in \p handleType + Once all shareable handles are closed and the allocation is released, the allocated + memory referenced will be released back to the OS and uses of the CUDA handle afterward + will lead to undefined behavior. + This API can also be used in conjunction with other APIs (e.g.Vulkan, OpenGL) + that support importing memory from the shareable type - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to the location in which to store the requested handle type + CUDA handle for the memory allocation + Type of shareable handle requested (defines type and size of the \p shareableHandle output parameter) + Reserved, must be zero - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Imports an allocation from a requested shareable handle type. + If the current process cannot support the memory described by this shareable + handle, this API will error as CUDA_ERROR_NOT_SUPPORTED. + \note Importing shareable handles exported from some graphics APIs(Vulkan, OpenGL, etc) + created on devices under an SLI group may not be supported, and thus this API will + return CUDA_ERROR_NOT_SUPPORTED. + There is no guarantee that the contents of \p handle will be the same CUDA memory handle + for the same given OS shareable handle, or the same underlying allocation. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + CUDA Memory handle for the memory allocation. + Shareable Handle representing the memory allocation that is to be imported. + handle type of the exported handle ::CUmemAllocationHandleType. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Calculates either the minimal or recommended granularity + Calculates either the minimal or recommended granularity + for a given allocation specification and returns it in granularity.This + granularity can be used as a multiple for alignment, size, or address mapping. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + granularity Returned granularity. + prop Property for which to determine the granularity for + option Determines which granularity to return - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Retrieve the contents of the property structure defining properties for this handle - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to a properties structure which will hold the information about this handle + Handle which to perform the query on + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Given an address \p addr, returns the allocation handle of the backing memory allocation. + The handle is guaranteed to be the same handle value used to map the memory. If the address + requested is not mapped, the function will fail.The returned handle must be released with + corresponding number of calls to::cuMemRelease. + + The address \p addr, can be any address in a range previously mapped + by::cuMemMap, and not necessarily the start address. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + CUDA Memory handle for the backing memory allocation. + Memory address to query, that has been mapped previously. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Frees memory with stream ordered semantics + Inserts a free operation into \p hStream. + The allocation must not be accessed after stream execution reaches the free. + After this API returns, accessing the memory from any subsequent work launched on the GPU + or querying its pointer attributes results in undefined behavior. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + memory to free + The stream establishing the stream ordering contract. + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Allocates memory with stream ordered semantics + Inserts an allocation operation into \p hStream. + A pointer to the allocated memory is returned immediately in *dptr. + The allocation must not be accessed until the the allocation operation completes. + The allocation comes from the memory pool current to the stream's device. + + note The default memory pool of a device contains device memory from that device. + note Basic stream ordering allows future work submitted into the same stream to use the allocation. + Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation + operation completes before work submitted in a separate stream runs. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Returned device pointer + Number of bytes to allocate + The stream establishing the stream ordering contract and the memory pool to allocate from + + + + + Tries to release memory back to the OS + Releases memory back to the OS until the pool contains fewer than minBytesToKeep + reserved bytes, or there is no more memory that the allocator can safely release. + The allocator cannot release OS allocations that back outstanding asynchronous allocations. + The OS allocations may happen at different granularity from the user allocations. + + note: Allocations that have not been freed count as outstanding. + note: Allocations that have been asynchronously freed but whose completion has + not been observed on the host (eg.by a synchronize) can count as outstanding. + + The memory pool to trim + If the pool has less than minBytesToKeep reserved, + the TrimTo operation is a no-op.Otherwise the pool will be guaranteed to have at least minBytesToKeep bytes reserved after the operation. + + + + + Sets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next + call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by::cuMemFreeAsync(default enabled). + + The memory pool to modify + The attribute to modify + Pointer to the value to assign + + + + + Sets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the allocator will try to release memory back to the OS on the next + call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency + between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies + in order to establish the stream ordering required to reuse + a piece of memory released by::cuMemFreeAsync(default enabled). + + The memory pool to modify + The attribute to modify + Pointer to the value to assign + + + + + Gets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying + to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the + allocator will try to release memory back to the OS on the + next call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies in order to establish the stream ordering + required to reuse a piece of memory released by::cuMemFreeAsync(default enabled). + + The memory pool to get attributes of + The attribute to get + Retrieved value + + + + + Gets attributes of a memory pool + Supported attributes are: + - ::CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: (value type = cuuint64_t) + Amount of reserved memory in bytes to hold onto before trying + to release memory back to the OS.When more than the release + threshold bytes of memory are held by the memory pool, the + allocator will try to release memory back to the OS on the + next call to stream, event or context synchronize. (default 0) + - ::CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to use memory asynchronously freed + in another stream as long as a stream ordering dependency + of the allocating stream on the free action exists. + Cuda events and null stream interactions can create the required + stream ordered dependencies. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: (value type = int) + Allow reuse of already completed frees when there is no dependency between the free and allocation. (default enabled) + - ::CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: (value type = int) + Allow::cuMemAllocAsync to insert new stream dependencies in order to establish the stream ordering + required to reuse a piece of memory released by::cuMemFreeAsync(default enabled). + + The memory pool to get attributes of + The attribute to get + Retrieved value + + + + + Controls visibility of pools between devices + + The pool being modified + Array of access descriptors. Each descriptor instructs the access to enable for a single gpu. + Number of descriptors in the map array. + + + + + Returns the accessibility of a pool from a device + Returns the accessibility of the pool's memory from the specified location. + + the accessibility of the pool from the specified location + the pool being queried + the location accessing the pool + + + + + Creates a memory pool + Creates a CUDA memory pool and returns the handle in \p pool. The \p poolProps determines + the properties of the pool such as the backing device and IPC capabilities. + By default, the pool's memory will be accessible from the device it is allocated on. + note Specifying CU_MEM_HANDLE_TYPE_NONE creates a memory pool that will not support IPC. + + + + + + + + Destroys the specified memory pool + If any pointers obtained from this pool haven't been freed or + the pool has free operations that haven't completed + when::cuMemPoolDestroy is invoked, the function will return immediately and the + resources associated with the pool will be released automatically + once there are no more outstanding allocations. + Destroying the current mempool of a device sets the default mempool of + that device as the current mempool for that device. + note A device's default memory pool cannot be destroyed. + + + + + + + Allocates memory from a specified pool with stream ordered semantics. + Inserts an allocation operation into \p hStream. + A pointer to the allocated memory is returned immediately in *dptr. + The allocation must not be accessed until the the allocation operation completes. + The allocation comes from the specified memory pool. + note + - The specified memory pool may be from a device different than that of the specified \p hStream. + - Basic stream ordering allows future work submitted into the same stream to use the allocation. + Stream query, stream synchronize, and CUDA events can be used to guarantee that the allocation + operation completes before work submitted in a separate stream runs. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned device pointer + Number of bytes to allocate + The pool to allocate from + The stream establishing the stream ordering semantic + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Exports a memory pool to the requested handle type. + Given an IPC capable mempool, create an OS handle to share the pool with another process. + A recipient process can convert the shareable handle into a mempool with::cuMemPoolImportFromShareableHandle. + Individual pointers can then be shared with the ::cuMemPoolExportPointer and ::cuMemPoolImportPointer APIs. + The implementation of what the shareable handle is and how it can be transferred is defined by the requested + handle type. + note: To create an IPC capable mempool, create a mempool with a CUmemAllocationHandleType other than CU_MEM_HANDLE_TYPE_NONE. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned OS handle + pool to export + the type of handle to create + must be 0 + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + imports a memory pool from a shared handle. + Specific allocations can be imported from the imported pool with cuMemPoolImportPointer. + note Imported memory pools do not support creating new allocations. As such imported memory pools + may not be used in cuDeviceSetMemPool or ::cuMemAllocFromPoolAsync calls. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned memory pool + OS handle of the pool to open + The type of handle being imported + must be 0 + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Export data to share a memory pool allocation between processes. + Constructs \p shareData_out for sharing a specific allocation from an already shared memory pool. + The recipient process can import the allocation with the::cuMemPoolImportPointer api. + The data is not a handle and may be shared through any IPC mechanism. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned export data + pointer to memory being exported + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Import a memory pool allocation from another process. + Returns in \p ptr_out a pointer to the imported memory. + The imported memory must not be accessed before the allocation operation completes + in the exporting process.The imported memory must be freed from all importing processes before + being freed in the exporting process.The pointer may be freed with cuMemFree + or cuMemFreeAsync.If cuMemFreeAsync is used, the free must be completed + on the importing process before the free operation on the exporting process. + note The cuMemFreeAsync api may be used in the exporting process before + the cuMemFreeAsync operation completes in its stream as long as the + cuMemFreeAsync in the exporting process specifies a stream with + a stream dependency on the importing process's cuMemFreeAsync. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + pointer to imported memory + pool from which to import + data specifying the memory to import + - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Intra-device memcpy's done with these functions may execute in parallel with the CPU, + but if host memory is involved, they wait until the copy is done before returning. - Destination device pointer - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies data between two pointers. + dst and src are base pointers of the destination and source, respectively. + ByteCount specifies the number of bytes to copy. + Note that this function infers the type of the transfer (host to host, host to + device, device to device, or device to host) from the pointer values. This + function is only allowed in contexts which support unified addressing. + Note that this function is synchronous. - Destination device pointer - Source host pointer + Destination unified virtual address space pointer + Source unified virtual address space pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from device memory in one context to device memory in another + context. dstDevice is the base device pointer of the destination memory + and dstContext is the destination context. srcDevice is the base + device pointer of the source memory and srcContext is the source pointer. + ByteCount specifies the number of bytes to copy. + + Note that this function is asynchronous with respect to the host, but + serialized with respect all pending and future asynchronous work in to the + current context, srcContext, and dstContext (use + to avoid this synchronization). Destination device pointer - Source host pointer + Destination context + Source device pointer + Source context Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Perform a 3D memory copy according to the parameters specified in + pCopy. See the definition of the structure + for documentation of its parameters. + Note that this function is synchronous with respect to the host only if + the source or destination memory is of type ::CU_MEMORYTYPE_HOST. + Note also that this copy is serialized with respect all pending and future + asynchronous work in to the current context, the copy's source context, + and the copy's destination context (use to avoid + this synchronization). - Destination device pointer - Source host pointer - Size of memory copy in bytes + Parameters for the memory copy CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90580,7 +92920,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90592,7 +92932,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90604,7 +92944,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90616,7 +92956,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90628,7 +92968,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90640,7 +92980,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90652,7 +92992,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90664,7 +93004,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90676,7 +93016,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90688,7 +93028,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90700,7 +93040,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90712,7 +93052,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90724,7 +93064,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90736,7 +93076,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90748,7 +93088,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90760,7 +93100,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90772,7 +93112,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90784,7 +93124,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90796,7 +93136,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90808,7 +93148,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90820,7 +93160,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90832,7 +93172,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90844,7 +93184,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -90856,1123 +93196,1003 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - - Destination host pointer - Source device pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination host pointer - Source device pointer + Destination device pointer + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -91984,7 +94204,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -91996,7 +94216,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92008,7 +94228,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92020,7 +94240,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92032,7 +94252,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92044,7 +94264,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92056,7 +94276,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92068,7 +94288,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92080,7 +94300,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92092,7 +94312,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92104,7 +94324,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92116,7 +94336,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92128,7 +94348,7 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. @@ -92140,3971 +94360,3706 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device memory to device memory. dstDevice and srcDevice are the base pointers of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is asynchronous. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer + Destination host pointer Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting index of the destination data. srcDevice specifies the base pointer of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array + Destination host pointer Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - - - Copies from one 1D CUDA array to device memory. dstDevice specifies the base pointer of the destination and - must be naturally aligned with the CUDA array elements. srcArray and srcOffset specify the CUDA array - handle and the offset in bytes into the array where the copy is to begin. ByteCount specifies the number of bytes to - copy and must be evenly divisible by the array element size. - - Destination device pointer - Source array - Offset in bytes of source array - Size of memory copy in bytes. Must be evenly divisible by the array element size. - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. - - Destination array - Offset in bytes of destination array - Source host pointer - Size of memory copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination array - Offset in bytes of destination array - Source host pointer + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is synchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination host pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from device memory to device memory. dstDevice and srcDevice are the base pointers of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is asynchronous. - Destination device pointer - Source array - Offset in bytes of source array + Destination device pointer + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + Copies from device memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting index of the destination data. srcDevice specifies the base pointer of the source. ByteCount specifies the number of bytes to copy. - Destination device pointer - Source array - Offset in bytes of source array + Destination array + Offset in bytes of destination array + Source device pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. + Copies from one 1D CUDA array to device memory. dstDevice specifies the base pointer of the destination and + must be naturally aligned with the CUDA array elements. srcArray and srcOffset specify the CUDA array + handle and the offset in bytes into the array where the copy is to begin. ByteCount specifies the number of bytes to + copy and must be evenly divisible by the array element size. - Destination device pointer + Destination device pointer Source array Offset in bytes of source array - Size of memory copy in bytes + Size of memory copy in bytes. Must be evenly divisible by the array element size. CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies the number of bytes to copy. - Destination device pointer - Source array - Offset in bytes of source array + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies the number of bytes to copy. - Destination device pointer - Source array - Offset in bytes of source array + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies the number of bytes to copy. - Destination device pointer - Source array - Offset in bytes of source array + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies the number of bytes to copy. - Destination device pointer - Source array - Offset in bytes of source array + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies the number of bytes to copy. - Destination device pointer - Source array - Offset in bytes of source array + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and - source CUDA arrays for the copy, respectively. dstOffset and srcOffset specify the destination and source - offsets in bytes into the CUDA arrays. ByteCount is the number of bytes to be copied. The size of the elements - in the CUDA arrays need not be the same format, but the elements must be the same size; and count must be evenly - divisible by that size. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. Destination array Offset in bytes of destination array - Source array - Offset in bytes of source array + Source host pointer Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Perform a 2D memory copy according to the parameters specified in pCopy. See . - returns an error if any pitch is greater than the maximum allowed (). - passes back pitches that always work with . On intra-device - memory copies (device ]]> device, CUDA array ]]> device, CUDA array ]]> CUDA array), may fail - for pitches not computed by . does not have this restriction, but - may run significantly slower in the cases where would have returned an error code. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Parameters for the memory copy + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Perform a 2D memory copy according to the parameters specified in pCopy. See . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Parameters for the memory copy + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Perform a 3D memory copy according to the parameters specified in pCopy. See . - The srcLOD and dstLOD members of the CUDAMemCpy3D structure must be set to 0. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Parameters for the memory copy + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Any host memory involved must be DMA'able (e.g., allocated with cuMemAllocHost). - memcpy's done with these functions execute in parallel with the CPU and, if - the hardware is available, may execute in parallel with the GPU. - Asynchronous memcpy must be accompanied by appropriate stream synchronization. - + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies data between two pointers. - dst and src are base pointers of the destination and source, respectively. - ByteCount specifies the number of bytes to copy. - Note that this function infers the type of the transfer (host to host, host to - device, device to device, or device to host) from the pointer values. This - function is only allowed in contexts which support unified addressing. - Note that this function is asynchronous and can optionally be associated to - a stream by passing a non-zero hStream argument + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination unified virtual address space pointer - Source unified virtual address space pointer + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes - Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device memory in one context to device memory in another - context. dstDevice is the base device pointer of the destination memory - and dstContext is the destination context. srcDevice is the base - device pointer of the source memory and srcContext is the source pointer. - ByteCount specifies the number of bytes to copy. Note that this function - is asynchronous with respect to the host and all work in other streams in - other devices. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Destination context - Source device pointer - Source context + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes - Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Perform a 3D memory copy according to the parameters specified in - pCopy. See the definition of the structure - for documentation of its parameters. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Parameters for the memory copy - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. - is asynchronous and can optionally be associated to a stream by passing a non-zero hStream - argument. It only works on page-locked memory and returns an error if a pointer to pageable memory is passed as - input. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer + Destination array + Offset in bytes of destination array Source host pointer Size of memory copy in bytes - Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and - source, respectively. ByteCount specifies the number of bytes to copy. - is asynchronous and can optionally be associated to a stream by passing a non-zero - hStream argument. It only works on page-locked memory and returns an error if a pointer to pageable memory - is passed as input. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination host pointer - Source device pointer + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes - Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from device memory to device memory. dstDevice and srcDevice are the base pointers of the destination - and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is asynchronous - and can optionally be associated to a stream by passing a non-zero hStream argument. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Source device pointer + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes - Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and - starting offset in bytes of the destination data. srcHost specifies the base address of the source. ByteCount - specifies the number of bytes to copy. - is asynchronous and can optionally be associated to a stream by passing a non-zero - hStream argument. It only works on page-locked memory and returns an error if a pointer to pageable memory - is passed as input. + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. Destination array Offset in bytes of destination array Source host pointer Size of memory copy in bytes - Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray - and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies - the number of bytes to copy. - is asynchronous and can optionally be associated to a stream by passing a non-zero stream hStream - argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed - as input. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination pointer - Source array - Offset in bytes of source array + Destination array + Offset in bytes of destination array + Source host pointer Size of memory copy in bytes - Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Perform a 2D memory copy according to the parameters specified in pCopy. See . - returns an error if any pitch is greater than the maximum allowed (). - passes back pitches that always work with . On intra-device - memory copies (device ]]> device, CUDA array ]]> device, CUDA array ]]> CUDA array), may fail - for pitches not computed by . (not async!) does not have this restriction, but - may run significantly slower in the cases where would have returned an error code. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Parameters for the memory copy - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Perform a 3D memory copy according to the parameters specified in pCopy. See . - returns an error if any pitch is greater than the maximum allowed (). - is asynchronous and can optionally be associated to a stream by passing a non-zero hStream - argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed - as input. - The srcLOD and dstLOD members of the CUDAMemCpy3D structure must be set to 0. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Parameters for the memory copy - Stream indetifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. - - - - Combines all memset API calls - + Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the memory range of N 8-bit values to the specified value b. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Value to set - Number of elements + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the memory range of N 16-bit values to the specified value us. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Value to set - Number of elements + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the memory range of N 32-bit values to the specified value ui. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Value to set - Number of elements + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the 2D memory range of Width 8-bit values to the specified value b. Height specifies the number of rows to - set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is - one that has been passed back by . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Pitch of destination device pointer - Value to set - Width of row - Number of rows + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the 2D memory range of Width 16-bit values to the specified value us. Height specifies the number of rows to - set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is - one that has been passed back by . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Pitch of destination device pointer - Value to set - Width of row - Number of rows + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the 2D memory range of Width 32-bit values to the specified value us. Height specifies the number of rows to - set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is - one that has been passed back by . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Pitch of destination device pointer - Value to set - Width of row - Number of rows + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - - - Combines all async memset API calls - - - + - Sets the memory range of N 8-bit values to the specified value b. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Value to set - Number of elements - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the memory range of N 16-bit values to the specified value us. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Value to set - Number of elements - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the memory range of N 32-bit values to the specified value ui. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Value to set - Number of elements - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the 2D memory range of Width 8-bit values to the specified value b. Height specifies the number of rows to - set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is - one that has been passed back by . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Pitch of destination device pointer - Value to set - Width of row - Number of rows - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the 2D memory range of Width 16-bit values to the specified value us. Height specifies the number of rows to - set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is - one that has been passed back by . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Pitch of destination device pointer - Value to set - Width of row - Number of rows - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the 2D memory range of Width 32-bit values to the specified value us. Height specifies the number of rows to - set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is - one that has been passed back by . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Destination device pointer - Pitch of destination device pointer - Value to set - Width of row - Number of rows - Stream identifier + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - - - Combines all function / kernel API calls - - - + - Specifies the x, y, and z dimensions of the thread blocks that are created when the kernel given by hfunc is launched. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Kernel to specify dimensions of - X dimension - Y dimension - Z dimension + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets through bytes the amount of dynamic shared memory that will be available to each thread block when the kernel - given by hfunc is launched. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Kernel to specify dynamic shared-memory size for - Dynamic shared-memory size per thread in bytes + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pi the integer value of the attribute attrib on the kernel given by hfunc. See . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned attribute value - Attribute requested - Function to query attribute of + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets information about a function - - This call sets the value of a specified attribute \p attrib on the kernel given - by \p hfunc to an integer value specified by \p val - - This function returns CUDA_SUCCESS if the new value of the attribute could be - successfully set. If the set fails, this call will return an error. - - Not all attributes can have values set. Attempting to set a value on a read-only - attribute will result in an error (CUDA_ERROR_INVALID_VALUE) - - Supported attributes for the cuFuncSetAttribute call are: - - ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This maximum size in bytes of - dynamically-allocated shared memory.The value should contain the requested - maximum size of dynamically-allocated shared memory.The sum of this value and - the function attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the - device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN. - The maximal size of requestable dynamic shared memory may differ by GPU - architecture. - - ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1 - cache and shared memory use the same hardware resources, this sets the shared memory - carveout preference, in percent of the total resources.This is only a hint, and the - driver can choose a different ratio if required to execute the function. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Function to query attribute of - Attribute requested - The value to set + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - On devices where the L1 cache and shared memory use the same hardware resources, this sets through config - the preferred cache configuration for the device function hfunc. This is only a preference. The driver will use the - requested configuration if possible, but it is free to choose a different configuration if required to execute hfunc. - This setting does nothing on devices where the size of the L1 cache and shared memory are fixed. - Switching between configuration modes may insert a device-side synchronization point for streamed kernel launches. - The supported cache modes are defined in + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Kernel to configure cache for - Requested cache configuration + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the shared memory configuration for a device function. - On devices with configurable shared memory banks, this function will - force all subsequent launches of the specified device function to have - the given shared memory bank size configuration. On any given launch of the - function, the shared memory configuration of the device will be temporarily - changed if needed to suit the function's preferred configuration. Changes in - shared memory configuration between subsequent launches of functions, - may introduce a device side synchronization point. - Any per-function setting of shared memory bank size set via - will override the context wide setting set with - . - Changing the shared memory bank size will not increase shared memory usage - or affect occupancy of kernels, but may have major effects on performance. - Larger bank sizes will allow for greater potential bandwidth to shared memory, - but will change what kinds of accesses to shared memory will result in bank - conflicts. - This function will do nothing on devices with fixed shared memory bank size. - The supported bank configurations are - - : set bank width to the default initial - setting (currently, four bytes). - - : set shared memory bank width to - be natively four bytes. - - : set shared memory bank width to - be natively eight bytes. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - kernel to be given a shared memory config - requested shared memory configuration - CUDA Error Codes: , , , , - . + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Combines all array management API calls + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Creates a CUDA array according to the structure pAllocateArray and returns a - handle to the new CUDA array in pHandle. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned array - Array descriptor + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pArrayDescriptor a descriptor containing information on the format and dimensions of the CUDA - array hArray. It is useful for subroutines that have been passed a CUDA array, but need to know the CUDA array - parameters for validation or other purposes. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned array descriptor - Array to get descriptor of + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Destroys the CUDA array hArray. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Array to destroy + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Creates a CUDA array according to the structure pAllocateArray and returns - a handle to the new CUDA array in pHandle. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned array - 3D array descriptor + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pArrayDescriptor a descriptor containing information on the format and dimensions of the CUDA - array hArray. It is useful for subroutines that have been passed a CUDA array, but need to know the CUDA array - parameters for validation or other purposes. - This function may be called on 1D and 2D arrays, in which case the Height and/or Depth members of the descriptor - struct will be set to 0. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned 3D array descriptor - 3D array to get descriptor of + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . Note that this function may also return error codes from previous, asynchronous launches. - + - Creates a CUDA mipmapped array according to the ::CUDA_ARRAY3D_DESCRIPTOR structure - pMipmappedArrayDesc and returns a handle to the new CUDA mipmapped array in pHandle. - numMipmapLevels specifies the number of mipmap levels to be allocated. This value is - clamped to the range [1, 1 + floor(log2(max(width, height, depth)))]. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned mipmapped array - mipmapped array descriptor - Number of mipmap levels + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pLevelArray a CUDA array that represents a single mipmap level - of the CUDA mipmapped array hMipmappedArray. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned mipmap level CUDA array - CUDA mipmapped array - Mipmap level + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Destroys the CUDA mipmapped array hMipmappedArray. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Mipmapped array to destroy + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , - , , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Groups all texture reference management API calls + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Creates a texture reference and returns its handle in pTexRef. Once created, the application must call - or to associate the reference with allocated memory. Other texture reference functions - are used to specify the format and interpretation (addressing, filtering, etc.) to be used when the memory is read - through this texture reference. To associate the texture reference with a texture ordinal for a given function, the - application should call . + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned texture reference + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Destroys the texture reference specified by hTexRef. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Texture reference to destroy + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Binds the CUDA array hArray to the texture reference hTexRef. Any previous address or CUDA array state - associated with the texture reference is superseded by this function. Flags must be set to - . Any CUDA array previously bound to hTexRef is unbound. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Texture reference to bind - Array to bind - Options (must be ) + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Binds the CUDA mipmapped array hMipmappedArray to the texture reference hTexRef. - Any previous address or CUDA array state associated with the texture reference - is superseded by this function. Flags must be set to . - Any CUDA array previously bound to hTexRef is unbound. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Texture reference to bind - Mipmapped array to bind - Options (must be ) + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated - with the texture reference is superseded by this function. Any memory previously bound to hTexRef is unbound. - Since the hardware enforces an alignment requirement on texture base addresses, passes back - a byte offset in ByteOffset that must be applied to texture fetches in order to read from the desired memory. This - offset must be divided by the texel size and passed to kernels that read from the texture so they can be applied to the - tex1Dfetch() function. - If the device memory pointer was returned from , the offset is guaranteed to be 0 and null may be - passed as the ByteOffset parameter. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Returned byte offset - Texture reference to bind - Device pointer to bind - Size of memory to bind in bytes + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated - with the texture reference is superseded by this function. Any memory previously bound to hTexRef is unbound. - Using a tex2D() function inside a kernel requires a call to either to bind the corresponding texture - reference to an array, or to bind the texture reference to linear memory. - Function calls to cannot follow calls to for the same texture reference. - It is required that dptr be aligned to the appropriate hardware-specific texture alignment. You can query this value - using the device attribute . If an unaligned dptr is supplied, - is returned. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Texture reference to bind - Descriptor of CUDA array - Device pointer to bind - Line pitch in bytes> + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies the format of the data to be read by the texture reference hTexRef. fmt and NumPackedComponents - are exactly analogous to the Format and NumChannels members of the structure: - They specify the format of each component and the number of components per array element. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. pSrc specifies the base address of the source. ByteCount specifies + the number of bytes to copy. - Texture reference - Format to set - Number of components per array element + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies the addressing mode am for the given dimension dim of the texture reference hTexRef. If dim is zero, - the addressing mode is applied to the first parameter of the functions used to fetch from the texture; if dim is 1, the - second, and so on. See . - Note that this call has no effect if hTexRef is bound to linear memory. - - Texture reference - Dimension - Addressing mode to set + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. + + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies the filtering mode fm to be used when reading memory through the texture reference hTexRef. See . - Note that this call has no effect if hTexRef is bound to linear memory. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Texture reference - Filtering mode to set + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies optional flags via Flags to specify the behavior of data returned through the texture reference hTexRef. See . + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Texture reference - Optional flags to set + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pdptr the base address bound to the texture reference hTexRef, or returns - if the texture reference is not bound to any device memory range. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned device address - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in phArray the CUDA array bound to the texture reference hTexRef, or returns - if the texture reference is not bound to any CUDA array. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned array - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in phMipmappedArray the CUDA mipmapped array bound to the texture - reference hTexRef, or returns if the texture reference - is not bound to any CUDA mipmapped array. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned mipmapped array - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pam the addressing mode corresponding to the dimension dim of the texture reference hTexRef. Currently, - the only valid value for dim are 0 and 1. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned addressing mode - Texture reference - Dimension + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pfm the filtering mode of the texture reference hTexRef. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned filtering mode - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pFormat and pNumChannels the format and number of components of the CUDA array bound to - the texture reference hTexRef. If pFormat or pNumChannels is null, it will be ignored. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned format - Returned number of components - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in pFlags the flags of the texture reference hTexRef. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned flags - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - - - Returns the mipmap filtering mode in pfm that's used when reading memory through - the texture reference hTexRef. - - Returned mipmap filtering mode - Texture reference - - - + - Returns the mipmap level bias in pBias that's added to the specified mipmap - level when reading memory through the texture reference hTexRef. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned mipmap level bias - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Returns the min/max mipmap level clamps in pminMipmapLevelClamp and pmaxMipmapLevelClamp - that's used when reading memory through the texture reference hTexRef. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned mipmap min level clamp - Returned mipmap max level clamp - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Returns the maximum aniostropy in pmaxAniso that's used when reading memory through - the texture reference. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned maximum anisotropy - Texture reference + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies the mipmap filtering mode fm to be used when reading memory through - the texture reference hTexRef. - Note that this call has no effect if hTexRef is not bound to a mipmapped array. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Texture reference - Filtering mode to set + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies the mipmap level bias bias to be added to the specified mipmap level when - reading memory through the texture reference hTexRef. - Note that this call has no effect if hTexRef is not bound to a mipmapped array. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Texture reference - Mipmap level bias + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies the min/max mipmap level clamps, minMipmapLevelClamp and maxMipmapLevelClamp - respectively, to be used when reading memory through the texture reference - hTexRef. - Note that this call has no effect if hTexRef is not bound to a mipmapped array. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Texture reference - Mipmap min level clamp - Mipmap max level clamp + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Specifies the maximum aniostropy maxAniso to be used when reading memory through - the texture reference hTexRef. - Note that this call has no effect if hTexRef is not bound to a mipmapped array. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Texture reference - Maximum anisotropy + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , - , . + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the border color for a texture reference - Specifies the value of the RGBA color via the \p pBorderColor to the texture reference - \p hTexRef. The color value supports only float type and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component - - Note that the color values can be set only when the Address mode is set to - CU_TR_ADDRESS_MODE_BORDER using ::cuTexRefSetAddressMode. - Applications using integer border color values have to "reinterpret_cast" their values to float. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Texture reference - RGBA color + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Gets the border color used by a texture reference - Returns in \p pBorderColor, values of the RGBA color used by - the texture reference \p hTexRef. - The color value is of type float and holds color components in - the following sequence: - pBorderColor[0] holds 'R' component - pBorderColor[1] holds 'G' component - pBorderColor[2] holds 'B' component - pBorderColor[3] holds 'A' component + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Returned Type and Value of RGBA color - Texture reference - + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Combines all surface management API calls + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Sets the CUDA array hArray to be read and written by the surface reference hSurfRef. Any previous CUDA array - state associated with the surface reference is superseded by this function. Flags must be set to . The - flag must have been set for the CUDA array. Any CUDA array previously bound to - hSurfRef is unbound. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Surface reference handle - CUDA array handle - set to + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Returns in phArray the CUDA array bound to the surface reference hSurfRef, or returns - if the surface reference is not bound to any CUDA array. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Surface reference handle - Surface reference handle + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Combines all kernel / function parameter management API calls + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes + CUDA Error Codes: , , , + , . + Note that this function may also return error codes from previous, asynchronous launches. - + - Sets through numbytes the total size in bytes needed by the function parameters of the kernel corresponding to - hfunc. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to set parameter size for - Size of parameter list in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets an integer parameter that will be specified the next time the kernel corresponding to hfunc will be invoked. - offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add parameter to - Offset to add parameter to argument list - Value of parameter + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Sets a floating-point parameter that will be specified the next time the kernel corresponding to hfunc will be invoked. - offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add parameter to - Offset to add parameter to argument list - Value of parameter + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to another. dstArray and srcArray specify the handles of the destination and + source CUDA arrays for the copy, respectively. dstOffset and srcOffset specify the destination and source + offsets in bytes into the CUDA arrays. ByteCount is the number of bytes to be copied. The size of the elements + in the CUDA arrays need not be the same format, but the elements must be the same size; and count must be evenly + divisible by that size. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination array + Offset in bytes of destination array + Source array + Offset in bytes of source array + Size of memory copy in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Perform a 2D memory copy according to the parameters specified in pCopy. See . + returns an error if any pitch is greater than the maximum allowed (). + passes back pitches that always work with . On intra-device + memory copies (device ]]> device, CUDA array ]]> device, CUDA array ]]> CUDA array), may fail + for pitches not computed by . does not have this restriction, but + may run significantly slower in the cases where would have returned an error code. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Parameters for the memory copy CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Perform a 2D memory copy according to the parameters specified in pCopy. See . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Parameters for the memory copy CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Perform a 3D memory copy according to the parameters specified in pCopy. See . + The srcLOD and dstLOD members of the CUDAMemCpy3D structure must be set to 0. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Parameters for the memory copy CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Any host memory involved must be DMA'able (e.g., allocated with cuMemAllocHost). + memcpy's done with these functions execute in parallel with the CPU and, if + the hardware is available, may execute in parallel with the GPU. + Asynchronous memcpy must be accompanied by appropriate stream synchronization. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies data between two pointers. + dst and src are base pointers of the destination and source, respectively. + ByteCount specifies the number of bytes to copy. + Note that this function infers the type of the transfer (host to host, host to + device, device to device, or device to host) from the pointer values. This + function is only allowed in contexts which support unified addressing. + Note that this function is asynchronous and can optionally be associated to + a stream by passing a non-zero hStream argument - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination unified virtual address space pointer + Source unified virtual address space pointer + Size of memory copy in bytes + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from device memory in one context to device memory in another + context. dstDevice is the base device pointer of the destination memory + and dstContext is the destination context. srcDevice is the base + device pointer of the source memory and srcContext is the source pointer. + ByteCount specifies the number of bytes to copy. Note that this function + is asynchronous with respect to the host and all work in other streams in + other devices. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Destination context + Source device pointer + Source context + Size of memory copy in bytes + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Perform a 3D memory copy according to the parameters specified in + pCopy. See the definition of the structure + for documentation of its parameters. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Parameters for the memory copy + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from host memory to device memory. dstDevice and srcHost are the base addresses of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. + is asynchronous and can optionally be associated to a stream by passing a non-zero hStream + argument. It only works on page-locked memory and returns an error if a pointer to pageable memory is passed as + input. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source host pointer + Size of memory copy in bytes + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from device to host memory. dstHost and srcDevice specify the base pointers of the destination and + source, respectively. ByteCount specifies the number of bytes to copy. + is asynchronous and can optionally be associated to a stream by passing a non-zero + hStream argument. It only works on page-locked memory and returns an error if a pointer to pageable memory + is passed as input. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination host pointer + Source device pointer + Size of memory copy in bytes + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from device memory to device memory. dstDevice and srcDevice are the base pointers of the destination + and source, respectively. ByteCount specifies the number of bytes to copy. Note that this function is asynchronous + and can optionally be associated to a stream by passing a non-zero hStream argument. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Source device pointer + Size of memory copy in bytes + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from host memory to a 1D CUDA array. dstArray and dstOffset specify the CUDA array handle and + starting offset in bytes of the destination data. srcHost specifies the base address of the source. ByteCount + specifies the number of bytes to copy. + is asynchronous and can optionally be associated to a stream by passing a non-zero + hStream argument. It only works on page-locked memory and returns an error if a pointer to pageable memory + is passed as input. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination array + Offset in bytes of destination array + Source host pointer + Size of memory copy in bytes + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Copies from one 1D CUDA array to host memory. dstHost specifies the base pointer of the destination. srcArray + and srcOffset specify the CUDA array handle and starting offset in bytes of the source data. ByteCount specifies + the number of bytes to copy. + is asynchronous and can optionally be associated to a stream by passing a non-zero stream hStream + argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed + as input. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination pointer + Source array + Offset in bytes of source array + Size of memory copy in bytes + Stream identifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Perform a 2D memory copy according to the parameters specified in pCopy. See . + returns an error if any pitch is greater than the maximum allowed (). + passes back pitches that always work with . On intra-device + memory copies (device ]]> device, CUDA array ]]> device, CUDA array ]]> CUDA array), may fail + for pitches not computed by . (not async!) does not have this restriction, but + may run significantly slower in the cases where would have returned an error code. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Parameters for the memory copy + Stream identifier CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Perform a 3D memory copy according to the parameters specified in pCopy. See . + returns an error if any pitch is greater than the maximum allowed (). + is asynchronous and can optionally be associated to a stream by passing a non-zero hStream + argument. It only works on page-locked host memory and returns an error if a pointer to pageable memory is passed + as input. + The srcLOD and dstLOD members of the CUDAMemCpy3D structure must be set to 0. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Parameters for the memory copy + Stream indetifier CUDA Error Codes: , , , , . - Note that this function may also return error codes from previous, asynchronous launches. + Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Combines all memset API calls - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the memory range of N 8-bit values to the specified value b. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Value to set + Number of elements CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the memory range of N 16-bit values to the specified value us. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Value to set + Number of elements CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the memory range of N 32-bit values to the specified value ui. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Value to set + Number of elements CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the 2D memory range of Width 8-bit values to the specified value b. Height specifies the number of rows to + set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is + one that has been passed back by . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Pitch of destination device pointer + Value to set + Width of row + Number of rows CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the 2D memory range of Width 16-bit values to the specified value us. Height specifies the number of rows to + set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is + one that has been passed back by . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Pitch of destination device pointer + Value to set + Width of row + Number of rows CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the 2D memory range of Width 32-bit values to the specified value us. Height specifies the number of rows to + set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is + one that has been passed back by . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Pitch of destination device pointer + Value to set + Width of row + Number of rows CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Combines all async memset API calls - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the memory range of N 8-bit values to the specified value b. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Value to set + Number of elements + Stream identifier CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the memory range of N 16-bit values to the specified value us. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Value to set + Number of elements + Stream identifier CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the memory range of N 32-bit values to the specified value ui. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Value to set + Number of elements + Stream identifier CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the 2D memory range of Width 8-bit values to the specified value b. Height specifies the number of rows to + set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is + one that has been passed back by . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Pitch of destination device pointer + Value to set + Width of row + Number of rows + Stream identifier CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the 2D memory range of Width 16-bit values to the specified value us. Height specifies the number of rows to + set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is + one that has been passed back by . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Pitch of destination device pointer + Value to set + Width of row + Number of rows + Stream identifier CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the 2D memory range of Width 32-bit values to the specified value us. Height specifies the number of rows to + set, and dstPitch specifies the number of bytes between each row. This function performs fastest when the pitch is + one that has been passed back by . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Destination device pointer + Pitch of destination device pointer + Value to set + Width of row + Number of rows + Stream identifier CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Combines all function / kernel API calls - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the x, y, and z dimensions of the thread blocks that are created when the kernel given by hfunc is launched. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Kernel to specify dimensions of + X dimension + Y dimension + Z dimension CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets through bytes the amount of dynamic shared memory that will be available to each thread block when the kernel + given by hfunc is launched. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Kernel to specify dynamic shared-memory size for + Dynamic shared-memory size per thread in bytes CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pi the integer value of the attribute attrib on the kernel given by hfunc. See . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned attribute value + Attribute requested + Function to query attribute of CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets information about a function + + This call sets the value of a specified attribute \p attrib on the kernel given + by \p hfunc to an integer value specified by \p val + + This function returns CUDA_SUCCESS if the new value of the attribute could be + successfully set. If the set fails, this call will return an error. + + Not all attributes can have values set. Attempting to set a value on a read-only + attribute will result in an error (CUDA_ERROR_INVALID_VALUE) + + Supported attributes for the cuFuncSetAttribute call are: + + ::CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: This maximum size in bytes of + dynamically-allocated shared memory.The value should contain the requested + maximum size of dynamically-allocated shared memory.The sum of this value and + the function attribute::CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES cannot exceed the + device attribute ::CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN. + The maximal size of requestable dynamic shared memory may differ by GPU + architecture. + + ::CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: On devices where the L1 + cache and shared memory use the same hardware resources, this sets the shared memory + carveout preference, in percent of the total resources.This is only a hint, and the + driver can choose a different ratio if required to execute the function. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Function to query attribute of + Attribute requested + The value to set - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + On devices where the L1 cache and shared memory use the same hardware resources, this sets through config + the preferred cache configuration for the device function hfunc. This is only a preference. The driver will use the + requested configuration if possible, but it is free to choose a different configuration if required to execute hfunc. + This setting does nothing on devices where the size of the L1 cache and shared memory are fixed. + Switching between configuration modes may insert a device-side synchronization point for streamed kernel launches. + The supported cache modes are defined in - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Kernel to configure cache for + Requested cache configuration CUDA Error Codes: , , , - , . + . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the shared memory configuration for a device function. + On devices with configurable shared memory banks, this function will + force all subsequent launches of the specified device function to have + the given shared memory bank size configuration. On any given launch of the + function, the shared memory configuration of the device will be temporarily + changed if needed to suit the function's preferred configuration. Changes in + shared memory configuration between subsequent launches of functions, + may introduce a device side synchronization point. + Any per-function setting of shared memory bank size set via + will override the context wide setting set with + . + Changing the shared memory bank size will not increase shared memory usage + or affect occupancy of kernels, but may have major effects on performance. + Larger bank sizes will allow for greater potential bandwidth to shared memory, + but will change what kinds of accesses to shared memory will result in bank + conflicts. + This function will do nothing on devices with fixed shared memory bank size. + The supported bank configurations are + - : set bank width to the default initial + setting (currently, four bytes). + - : set shared memory bank width to + be natively four bytes. + - : set shared memory bank width to + be natively eight bytes. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + kernel to be given a shared memory config + requested shared memory configuration + CUDA Error Codes: , , , , + . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns a module handle + Returns in \p *hmod the handle of the module that function \p hfunc + is located in. The lifetime of the module corresponds to the lifetime of + the context it was loaded in or until the module is explicitly unloaded. + The CUDA runtime manages its own modules loaded into the primary context. + If the handle returned by this API refers to a module loaded by the CUDA runtime, + calling ::cuModuleUnload() on that module will result in undefined behavior. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + + - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Combines all array management API calls - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Creates a CUDA array according to the structure pAllocateArray and returns a + handle to the new CUDA array in pHandle. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned array + Array descriptor CUDA Error Codes: , , , - , . + , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pArrayDescriptor a descriptor containing information on the format and dimensions of the CUDA + array hArray. It is useful for subroutines that have been passed a CUDA array, but need to know the CUDA array + parameters for validation or other purposes. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned array descriptor + Array to get descriptor of CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns the layout properties of a sparse CUDA array + Returns the layout properties of a sparse CUDA array in \p sparseProperties + If the CUDA array is not allocated with flag ::CUDA_ARRAY3D_SPARSE ::CUDA_ERROR_INVALID_VALUE will be returned. + If the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, + then::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize represents the total size of the array.Otherwise, it will be zero. + Also, the returned value in ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is always zero. + Note that the \p array must have been allocated using ::cuArrayCreate or::cuArray3DCreate.For CUDA arrays obtained + using ::cuMipmappedArrayGetLevel, ::CUDA_ERROR_INVALID_VALUE will be returned.Instead, ::cuMipmappedArrayGetSparseProperties + must be used to obtain the sparse properties of the entire CUDA mipmapped array to which \p array belongs to. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES + CUDA array to get the sparse properties of + - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns the layout properties of a sparse CUDA mipmapped array + Returns the sparse array layout properties in \p sparseProperties + If the CUDA mipmapped array is not allocated with flag ::CUDA_ARRAY3D_SPARSE + ::CUDA_ERROR_INVALID_VALUE will be returned. + For non-layered CUDA mipmapped arrays, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize returns the + size of the mip tail region.The mip tail region includes all mip levels whose width, height or depth + is less than that of the tile. + For layered CUDA mipmapped arrays, if ::CUDA_ARRAY_SPARSE_PROPERTIES::flags contains ::CU_ARRAY_SPARSE_PROPERTIES_SINGLE_MIPTAIL, + then ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies the size of the mip tail of all layers combined. + Otherwise, ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize specifies mip tail size per layer. + The returned value of::CUDA_ARRAY_SPARSE_PROPERTIES::miptailFirstLevel is valid only if ::CUDA_ARRAY_SPARSE_PROPERTIES::miptailSize is non-zero. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Pointer to ::CUDA_ARRAY_SPARSE_PROPERTIES + CUDA mipmapped array to get the sparse properties of + - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Gets a CUDA array plane from a CUDA array + Returns in \p pPlaneArray a CUDA array that represents a single format plane + of the CUDA array \p hArray. + If \p planeIdx is greater than the maximum number of planes in this array or if the array does + not have a multi-planar format e.g: ::CU_AD_FORMAT_NV12, then::CUDA_ERROR_INVALID_VALUE is returned. + Note that if the \p hArray has format ::CU_AD_FORMAT_NV12, then passing in 0 for \p planeIdx returns + a CUDA array of the same size as \p hArray but with one channel and::CU_AD_FORMAT_UNSIGNED_INT8 as its format. + If 1 is passed for \p planeIdx, then the returned CUDA array has half the height and width + of \p hArray with two channels and ::CU_AD_FORMAT_UNSIGNED_INT8 as its format. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned CUDA array referenced by the planeIdx + Multiplanar CUDA array + Plane index + - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Destroys the CUDA array hArray. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Array to destroy CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Creates a CUDA array according to the structure pAllocateArray and returns + a handle to the new CUDA array in pHandle. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned array + 3D array descriptor CUDA Error Codes: , , , - , . + , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pArrayDescriptor a descriptor containing information on the format and dimensions of the CUDA + array hArray. It is useful for subroutines that have been passed a CUDA array, but need to know the CUDA array + parameters for validation or other purposes. + This function may be called on 1D and 2D arrays, in which case the Height and/or Depth members of the descriptor + struct will be set to 0. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned 3D array descriptor + 3D array to get descriptor of CUDA Error Codes: , , , - , . + , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Creates a CUDA mipmapped array according to the ::CUDA_ARRAY3D_DESCRIPTOR structure + pMipmappedArrayDesc and returns a handle to the new CUDA mipmapped array in pHandle. + numMipmapLevels specifies the number of mipmap levels to be allocated. This value is + clamped to the range [1, 1 + floor(log2(max(width, height, depth)))]. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned mipmapped array + mipmapped array descriptor + Number of mipmap levels CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , , , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pLevelArray a CUDA array that represents a single mipmap level + of the CUDA mipmapped array hMipmappedArray. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned mipmap level CUDA array + CUDA mipmapped array + Mipmap level CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Destroys the CUDA mipmapped array hMipmappedArray. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Mipmapped array to destroy CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Groups all texture reference management API calls - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Creates a texture reference and returns its handle in pTexRef. Once created, the application must call + or to associate the reference with allocated memory. Other texture reference functions + are used to specify the format and interpretation (addressing, filtering, etc.) to be used when the memory is read + through this texture reference. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned texture reference CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Destroys the texture reference specified by hTexRef. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference to destroy CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Binds the CUDA array hArray to the texture reference hTexRef. Any previous address or CUDA array state + associated with the texture reference is superseded by this function. Flags must be set to + . Any CUDA array previously bound to hTexRef is unbound. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference to bind + Array to bind + Options (must be ) CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Binds the CUDA mipmapped array hMipmappedArray to the texture reference hTexRef. + Any previous address or CUDA array state associated with the texture reference + is superseded by this function. Flags must be set to . + Any CUDA array previously bound to hTexRef is unbound. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference to bind + Mipmapped array to bind + Options (must be ) CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated + with the texture reference is superseded by this function. Any memory previously bound to hTexRef is unbound. + Since the hardware enforces an alignment requirement on texture base addresses, passes back + a byte offset in ByteOffset that must be applied to texture fetches in order to read from the desired memory. This + offset must be divided by the texel size and passed to kernels that read from the texture so they can be applied to the + tex1Dfetch() function. + If the device memory pointer was returned from , the offset is guaranteed to be 0 and null may be + passed as the ByteOffset parameter. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned byte offset + Texture reference to bind + Device pointer to bind + Size of memory to bind in bytes CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Binds a linear address range to the texture reference hTexRef. Any previous address or CUDA array state associated + with the texture reference is superseded by this function. Any memory previously bound to hTexRef is unbound. + Using a tex2D() function inside a kernel requires a call to either to bind the corresponding texture + reference to an array, or to bind the texture reference to linear memory. + Function calls to cannot follow calls to for the same texture reference. + It is required that dptr be aligned to the appropriate hardware-specific texture alignment. You can query this value + using the device attribute . If an unaligned dptr is supplied, + is returned. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference to bind + Descriptor of CUDA array + Device pointer to bind + Line pitch in bytes> CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the format of the data to be read by the texture reference hTexRef. fmt and NumPackedComponents + are exactly analogous to the Format and NumChannels members of the structure: + They specify the format of each component and the number of components per array element. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Format to set + Number of components per array element CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the addressing mode am for the given dimension dim of the texture reference hTexRef. If dim is zero, + the addressing mode is applied to the first parameter of the functions used to fetch from the texture; if dim is 1, the + second, and so on. See . + Note that this call has no effect if hTexRef is bound to linear memory. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Dimension + Addressing mode to set CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the filtering mode fm to be used when reading memory through the texture reference hTexRef. See . + Note that this call has no effect if hTexRef is bound to linear memory. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Filtering mode to set CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies optional flags via Flags to specify the behavior of data returned through the texture reference hTexRef. See . - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Optional flags to set CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pdptr the base address bound to the texture reference hTexRef, or returns + if the texture reference is not bound to any device memory range. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned device address + Texture reference CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in phArray the CUDA array bound to the texture reference hTexRef, or returns + if the texture reference is not bound to any CUDA array. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned array + Texture reference CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in phMipmappedArray the CUDA mipmapped array bound to the texture + reference hTexRef, or returns if the texture reference + is not bound to any CUDA mipmapped array. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned mipmapped array + Texture reference CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pam the addressing mode corresponding to the dimension dim of the texture reference hTexRef. Currently, + the only valid value for dim are 0 and 1. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned addressing mode + Texture reference + Dimension CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pfm the filtering mode of the texture reference hTexRef. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned filtering mode + Texture reference CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pFormat and pNumChannels the format and number of components of the CUDA array bound to + the texture reference hTexRef. If pFormat or pNumChannels is null, it will be ignored. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned format + Returned number of components + Texture reference CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns in pFlags the flags of the texture reference hTexRef. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned flags + Texture reference CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns the mipmap filtering mode in pfm that's used when reading memory through + the texture reference hTexRef. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned mipmap filtering mode + Texture reference + - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns the mipmap level bias in pBias that's added to the specified mipmap + level when reading memory through the texture reference hTexRef. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned mipmap level bias + Texture reference CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns the min/max mipmap level clamps in pminMipmapLevelClamp and pmaxMipmapLevelClamp + that's used when reading memory through the texture reference hTexRef. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned mipmap min level clamp + Returned mipmap max level clamp + Texture reference CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Returns the maximum aniostropy in pmaxAniso that's used when reading memory through + the texture reference. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Returned maximum anisotropy + Texture reference CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the mipmap filtering mode fm to be used when reading memory through + the texture reference hTexRef. + Note that this call has no effect if hTexRef is not bound to a mipmapped array. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Filtering mode to set CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the mipmap level bias bias to be added to the specified mipmap level when + reading memory through the texture reference hTexRef. + Note that this call has no effect if hTexRef is not bound to a mipmapped array. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Mipmap level bias CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the min/max mipmap level clamps, minMipmapLevelClamp and maxMipmapLevelClamp + respectively, to be used when reading memory through the texture reference + hTexRef. + Note that this call has no effect if hTexRef is not bound to a mipmapped array. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Mipmap min level clamp + Mipmap max level clamp CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Specifies the maximum aniostropy maxAniso to be used when reading memory through + the texture reference hTexRef. + Note that this call has no effect if hTexRef is not bound to a mipmapped array. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Texture reference + Maximum anisotropy CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + , . - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the border color for a texture reference + Specifies the value of the RGBA color via the \p pBorderColor to the texture reference + \p hTexRef. The color value supports only float type and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component + + Note that the color values can be set only when the Address mode is set to + CU_TR_ADDRESS_MODE_BORDER using ::cuTexRefSetAddressMode. + Applications using integer border color values have to "reinterpret_cast" their values to float. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Texture reference + RGBA color - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Gets the border color used by a texture reference + Returns in \p pBorderColor, values of the RGBA color used by + the texture reference \p hTexRef. + The color value is of type float and holds color components in + the following sequence: + pBorderColor[0] holds 'R' component + pBorderColor[1] holds 'G' component + pBorderColor[2] holds 'B' component + pBorderColor[3] holds 'A' component - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. + Returned Type and Value of RGBA color + Texture reference + - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Combines all surface management API calls - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - + - Copies an arbitrary amount of data (specified in numbytes) from ptr into the parameter space of the kernel corresponding - to hfunc. offset is a byte offset. + Sets the CUDA array hArray to be read and written by the surface reference hSurfRef. Any previous CUDA array + state associated with the surface reference is superseded by this function. Flags must be set to . The + flag must have been set for the CUDA array. Any CUDA array previously bound to + hSurfRef is unbound. - Kernel to add data to - Offset to add data to argument list - Pointer to arbitrary data - Size of data to copy in bytes + Surface reference handle + CUDA array handle + set to CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. - + - Makes the CUDA array or linear memory bound to the texture reference hTexRef available to a device program as a - texture. In this version of CUDA, the texture-reference must be obtained via and the texunit - parameter must be set to . + Returns in phArray the CUDA array bound to the surface reference hSurfRef, or returns + if the surface reference is not bound to any CUDA array. - Kernel to add texture-reference to - Texture unit (must be ) - Texture-reference to add to argument list + Surface reference handle + Surface reference handle CUDA Error Codes: , , , , . Note that this function may also return error codes from previous, asynchronous launches. @@ -96410,6 +98365,28 @@ , , . Note that this function may also return error codes from previous, asynchronous launches. + + + Records an event + Captures in \p hEvent the contents of \p hStream at the time of this call. + \p hEvent and \p hStream must be from the same context. + Calls such as ::cuEventQuery() or ::cuStreamWaitEvent() will then + examine or wait for completion of the work that was captured.Uses of + \p hStream after this call do not modify \p hEvent. See note on default + stream behavior for what is captured in the default case. + ::cuEventRecordWithFlags() can be called multiple times on the same event and + will overwrite the previously captured state.Other APIs such as + ::cuStreamWaitEvent() use the most recently captured state at the time + of the API call, and are not affected by later calls to + ::cuEventRecordWithFlags(). Before the first call to::cuEventRecordWithFlags(), an + event represents an empty set of work, so for example::cuEventQuery() + would return ::CUDA_SUCCESS. + + Event to record + Stream to record event for + See ::CUevent_capture_flags + + Returns if the event has actually been recorded, or if not. If @@ -96432,20 +98409,11 @@ , , . Note that this function may also return error codes from previous, asynchronous launches. - - - Destroys the event specified by event. - - Event to destroy - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - Destroys the event specified by event. In the case that hEvent has been recorded but has not yet been completed - when is called, the function will return immediately and + when is called, the function will return immediately and the resources associated with hEvent will be released automatically once the device has completed hEvent. @@ -96620,20 +98588,11 @@ , . Note that this function may also return error codes from previous, asynchronous launches. - - - Destroys the stream specified by hStream. - - Stream to destroy - CUDA Error Codes: , , , - , . - Note that this function may also return error codes from previous, asynchronous launches. - Destroys the stream specified by hStream. In the case that the device is still doing work in the stream hStream - when is called, the function will return immediately + when is called, the function will return immediately and the resources associated with hStream will be released automatically once the device has completed all work in hStream. @@ -96642,6 +98601,36 @@ , . Note that this function may also return error codes from previous, asynchronous launches. + + + Copies attributes from source stream to destination stream + Copies attributes from source stream \p src to destination stream \p dst. + Both streams must have the same context. + + Destination stream + Source stream + + + + Queries stream attribute. + Queries attribute \p attr from \p hStream and stores it in corresponding member of \p value_out. + + + + + + + + Sets stream attribute. + Sets attribute \p attr on \p hStream from corresponding attribute of + value.The updated attribute will be applied to subsequent work + submitted to the stream. It will not affect previously submitted work. + + + + + + Make a compute stream wait on an event @@ -96651,7 +98640,7 @@ The stream hStream will wait only for the completion of the most recent host call to on hEvent. Once this call has returned, - any functions (including and may be + any functions (including and may be called on hEvent again, and the subsequent calls will not have any effect on hStream. @@ -96813,7 +98802,7 @@ Must be one of - + Begins graph capture on a stream Begin graph capture on \p hStream. When a stream is in capture mode, all operations @@ -96824,6 +98813,7 @@ mode.The capture mode may be queried via ::cuStreamIsCapturing. Stream in which to initiate capture + Controls the interaction of this capture sequence with other API calls that are potentially unsafe. For more details see ::cuThreadExchangeStreamCaptureMode. Kernels captured using this API must not use texture and surface references. Reading or writing through any texture or surface reference is undefined behavior.This restriction does not apply to texture and surface objects. @@ -96863,6 +98853,142 @@ Stream to query Returns the stream's capture status + + + Swaps the stream capture interaction mode for a thread + Sets the calling thread's stream capture interaction mode to the value contained + in \p* mode, and overwrites \p* mode with the previous mode for the thread.To + facilitate deterministic behavior across function or module boundaries, callers + are encouraged to use this API in a push-pop fashion: \code + CUstreamCaptureMode mode = desiredMode; + cuThreadExchangeStreamCaptureMode(&mode); + ... + cuThreadExchangeStreamCaptureMode(&mode); // restore previous mode + \endcode + During stream capture(see::cuStreamBeginCapture), some actions, such as a call + to::cudaMalloc, may be unsafe. In the case of::cudaMalloc, the operation is + not enqueued asynchronously to a stream, and is not observed by stream capture. + Therefore, if the sequence of operations captured via ::cuStreamBeginCapture + depended on the allocation being replayed whenever the graph is launched, the + captured graph would be invalid. + Therefore, stream capture places restrictions on API calls that can be made within + or concurrently to a ::cuStreamBeginCapture-::cuStreamEndCapture sequence. This + behavior can be controlled via this API and flags to ::cuStreamBeginCapture. + A thread's mode is one of the following: + - \p CU_STREAM_CAPTURE_MODE_GLOBAL: This is the default mode.If the local thread has + an ongoing capture sequence that was not initiated with + \p CU_STREAM_CAPTURE_MODE_RELAXED at \p cuStreamBeginCapture, or if any other thread + has a concurrent capture sequence initiated with \p CU_STREAM_CAPTURE_MODE_GLOBAL, + this thread is prohibited from potentially unsafe API calls. + - \p CU_STREAM_CAPTURE_MODE_THREAD_LOCAL: If the local thread has an ongoing capture + sequence not initiated with \p CU_STREAM_CAPTURE_MODE_RELAXED, it is prohibited + from potentially unsafe API calls.Concurrent capture sequences in other threads + are ignored. + - \p CU_STREAM_CAPTURE_MODE_RELAXED: The local thread is not prohibited from potentially + unsafe API calls.Note that the thread is still prohibited from API calls which + necessarily conflict with stream capture, for example, attempting::cuEventQuery + on an event that was last recorded inside a capture sequence. + + + + + + Query capture status of a stream + Query the capture status of a stream and and get an id for + the capture sequence, which is unique over the lifetime of the process. + If called on::CU_STREAM_LEGACY(the "null stream") while a stream not created + with::CU_STREAM_NON_BLOCKING is capturing, returns::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT. + A valid id is returned only if both of the following are true: + - the call returns CUDA_SUCCESS + - captureStatus is set to ::CU_STREAM_CAPTURE_STATUS_ACTIVE + + + + + + + + Query a stream's capture state (11.3+) + Query stream state related to stream capture. + + If called on ::CU_STREAM_LEGACY(the "null stream") while a stream not created + with::CU_STREAM_NON_BLOCKING is capturing, returns::CUDA_ERROR_STREAM_CAPTURE_IMPLICIT. + + Valid data(other than capture status) is returned only if both of the following are true: + - the call returns CUDA_SUCCESS + - the returned capture status is ::CU_STREAM_CAPTURE_STATUS_ACTIVE + + This version of cuStreamGetCaptureInfo is introduced in CUDA 11.3 and will supplant the + previous version in 12.0. Developers requiring compatibility across minor versions to + CUDA 11.0 (driver version 445) should use ::cuStreamGetCaptureInfo or include a fallback + path. + + The stream to query + captureStatus_out - Location to return the capture status of the stream; required + Optional location to return an id for the capture sequence, which is unique over the lifetime of the process + Optional location to return the graph being captured into. All operations other than destroy and node removal are permitted on the graph + while the capture sequence is in progress.This API does not transfer + ownership of the graph, which is transferred or destroyed at + ::cuStreamEndCapture.Note that the graph handle may be invalidated before + end of capture for certain errors.Nodes that are or become + unreachable from the original stream at ::cuStreamEndCapture due to direct + actions on the graph do not trigger ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED. + Optional location to store a pointer to an array of nodes. The next node to be captured in the stream will depend on this set of nodes, + absent operations such as event wait which modify this set.The array pointer + is valid until the next API call which operates on the stream or until end of + capture. The node handles may be copied out and are valid until they or the + graph is destroyed.The driver-owned array may also be passed directly to + APIs that operate on the graph (not the stream) without copying. + Optional location to store the size of the array returned in dependencies_out. + + + \brief Update the set of dependencies in a capturing stream (11.3+) + + Modifies the dependency set of a capturing stream. The dependency set is the set + of nodes that the next captured node in the stream will depend on. + + Valid flags are ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES and + ::CU_STREAM_SET_CAPTURE_DEPENDENCIES. These control whether the set passed to + the API is added to the existing set or replaces it. A flags value of 0 defaults + to ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES. + + Nodes that are removed from the dependency set via this API do not result in + ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED if they are unreachable from the stream at + ::cuStreamEndCapture. + + Returns ::CUDA_ERROR_ILLEGAL_STATE if the stream is not capturing. + + This API is new in CUDA 11.3. Developers requiring compatibility across minor + versions to CUDA 11.0 should not use this API or provide a fallback. + + \return + ::CUDA_SUCCESS, + ::CUDA_ERROR_INVALID_VALUE, + ::CUDA_ERROR_ILLEGAL_STATE + + \sa + ::cuStreamBeginCapture, + ::cuStreamGetCaptureInfo, + ::cuStreamGetCaptureInfo_v2 + + Update the set of dependencies in a capturing stream (11.3+) + Modifies the dependency set of a capturing stream. The dependency set is the set of nodes that the next captured node in the stream will depend on. + Valid flags are ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES and + ::CU_STREAM_SET_CAPTURE_DEPENDENCIES.These control whether the set passed to + the API is added to the existing set or replaces it.A flags value of 0 defaults + to ::CU_STREAM_ADD_CAPTURE_DEPENDENCIES. + Nodes that are removed from the dependency set via this API do not result in + ::CUDA_ERROR_STREAM_CAPTURE_UNJOINED if they are unreachable from the stream at + ::cuStreamEndCapture. + Returns ::CUDA_ERROR_ILLEGAL_STATE if the stream is not capturing. + This API is new in CUDA 11.3. Developers requiring compatibility across minor + versions to CUDA 11.0 should not use this API or provide a fallback. + + + + + + Combines all graphics interop API calls @@ -97481,12 +99607,22 @@ Options + + + Returns dynamic shared memory available per block when launching \p numBlocks blocks on SM + Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory to allow \p numBlocks blocks per SM. + + Returned maximum dynamic shared memory + Kernel function for which occupancy is calculated + Number of blocks to fit on SM + Size of the blocks + - + Imports an external memory object Imports an externally allocated memory object and returns a handle to that in \p extMem_out. @@ -97495,7 +99631,7 @@ Memory import handle descriptor - + Maps a buffer onto an imported memory object Maps a buffer onto an imported memory object and returns a device pointer in \p devPtr. @@ -97504,7 +99640,7 @@ Handle to external memory object Buffer descriptor - + Maps a CUDA mipmapped array onto an external memory object Maps a CUDA mipmapped array onto an external object and returns a handle to it in \p mipmap. @@ -97522,7 +99658,7 @@ External memory object to be destroyed - + Imports an external semaphore Imports an externally allocated synchronization object and returns a handle to that in \p extSem_out. @@ -97532,7 +99668,7 @@ Semaphore import handle descriptor - + Signals a set of external semaphore objects Enqueues a signal operation on a set of externally allocated @@ -97547,7 +99683,7 @@ Number of semaphores to signal Stream to enqueue the signal operations in - + Waits on a set of external semaphore objects Enqueues a wait operation on a set of externally allocated @@ -97583,7 +99719,7 @@ Returns newly created graph Graph creation flags, must be 0 - + Creates a kernel execution node and adds it to a graph Creates a new kernel execution node and adds it to \p hGraph with \p numDependencies @@ -97598,7 +99734,7 @@ Number of dependencies Parameters for the GPU execution node - + Returns a kernel node's parameters Returns the parameters of kernel node \p hNode in \p nodeParams. @@ -97614,7 +99750,7 @@ Node to get the parameters for Pointer to return the parameters - + Sets a kernel node's parameters Sets the parameters of kernel node \p hNode to \p nodeParams. @@ -97662,7 +99798,7 @@ Node to set the parameters for Parameters to copy - + Creates a memset node and adds it to a graph Creates a new memset node and adds it to \p hGraph with \p numDependencies @@ -97680,7 +99816,7 @@ Parameters for the memory set Context on which to run the node - + Returns a memset node's parameters Returns the parameters of memset node \p hNode in \p nodeParams. @@ -97688,7 +99824,7 @@ Node to get the parameters for Pointer to return the parameters - + Sets a memset node's parameters Sets the parameters of memset node \p hNode to \p nodeParams. @@ -97696,7 +99832,7 @@ Node to set the parameters for Parameters to copy - + Creates a host execution node and adds it to a graph Creates a new CPU execution node and adds it to \p hGraph with \p numDependencies @@ -97712,7 +99848,7 @@ Number of dependencies Parameters for the host node - + Returns a host node's parameters Returns the parameters of host node \p hNode in \p nodeParams. @@ -97720,7 +99856,7 @@ Node to get the parameters for Pointer to return the parameters - + Sets a host node's parameters Sets the parameters of host node \p hNode to \p nodeParams. @@ -97750,6 +99886,7 @@ Gets a handle to the embedded graph in a child graph node. This call does not clone the graph. Changes to the graph will be reflected in the node, and the node retains ownership of the graph. + Allocation and free nodes cannot be added to the returned graph. Attempting to do so will return an error. Node to get the embedded graph for Location to store a handle to the graph @@ -97772,6 +99909,273 @@ Dependencies of the node Number of dependencies + + + Creates an event record node and adds it to a graph + Creates a new event record node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p params. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + Each launch of the graph will record \p event to capture execution of the + node's dependencies. + + Returns newly created node + Graph to which to add the node + Dependencies of the node + Number of dependencies + Event for the node + + + + + Returns the event associated with an event record node + + Node to get the event for + Pointer to return the event + + + + + Sets an event record node's event + + Node to set the event for + Event to use + + + + + Creates an event wait node and adds it to a graph + Creates a new event wait node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p params. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. + A handle to the new node will be returned in \p phGraphNode. + The graph node will wait for all work captured in \p event. See ::cuEventRecord() + for details on what is captured by an event. \p event may be from a different context + or device than the launch stream. + + Returns newly created node + Graph to which to add the node + Dependencies of the node + Number of dependencies + Event for the node + + + + + Returns the event associated with an event wait node + + Node to get the event for + Pointer to return the event + + + + + Sets an event wait node's event + + Node to set the event for + Event to use + + + + + Creates an external semaphore signal node and adds it to a graph + Creates a new external semaphore signal node and adds it to \p hGraph with \p + numDependencies dependencies specified via \p dependencies and arguments specified + in \p nodeParams.It is possible for \p numDependencies to be 0, in which case the + node will be placed at the root of the graph. \p dependencies may not have any + duplicate entries. A handle to the new node will be returned in \p phGraphNode. + + Returns newly created node + Graph to which to add the node + Dependencies of the node + Number of dependencies + Parameters for the node + + + + + Returns an external semaphore signal node's parameters + Returns the parameters of an external semaphore signal node \p hNode in \p params_out. + The \p extSemArray and \p paramsArray returned in \p params_out, + are owned by the node.This memory remains valid until the node is destroyed or its + parameters are modified, and should not be modified + directly.Use ::cuGraphExternalSemaphoresSignalNodeSetParams to update the parameters of this node. + + Node to get the parameters for + Pointer to return the parameters + + + + + Sets an external semaphore signal node's parameters + Sets the parameters of an external semaphore signal node \p hNode to \p nodeParams. + + Node to set the parameters for + Parameters to copy + + + + + Creates an external semaphore wait node and adds it to a graph + Creates a new external semaphore wait node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + + Returns newly created node + Graph to which to add the node + Dependencies of the node + Number of dependencies + Parameters for the node + + + + + Returns an external semaphore wait node's parameters + Returns the parameters of an external semaphore wait node \p hNode in \p params_out. + The \p extSemArray and \p paramsArray returned in \p params_out, + are owned by the node.This memory remains valid until the node is destroyed or its + parameters are modified, and should not be modified + directly.Use ::cuGraphExternalSemaphoresSignalNodeSetParams to update the + parameters of this node. + + Node to get the parameters for + Pointer to return the parameters + + + + + Sets an external semaphore wait node's parameters + Sets the parameters of an external semaphore wait node \p hNode to \p nodeParams. + + Node to set the parameters for + Parameters to copy + + + + + + + + Creates an allocation node and adds it to a graph + Creates a new allocation node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + When::cuGraphAddMemAllocNode creates an allocation node, it returns the address of the allocation in + \param nodeParams.dptr.The allocation's address remains fixed across instantiations and launches. + If the allocation is freed in the same graph, by creating a free node using ::cuGraphAddMemFreeNode, + the allocation can be accessed by nodes ordered after the allocation node but before the free node. + These allocations cannot be freed outside the owning graph, and they can only be freed once in the + owning graph. + If the allocation is not freed in the same graph, then it can be accessed not only by nodes in the + graph which are ordered after the allocation node, but also by stream operations ordered after the + graph's execution but before the allocation is freed. + Allocations which are not freed in the same graph can be freed by: + - passing the allocation to ::cuMemFreeAsync or ::cuMemFree; + - launching a graph with a free node for that allocation; or + - specifying::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH during instantiation, which makes + each launch behave as though it called::cuMemFreeAsync for every unfreed allocation. + It is not possible to free an allocation in both the owning graph and another graph.If the allocation + is freed in the same graph, a free node cannot be added to another graph.If the allocation is freed + in another graph, a free node can no longer be added to the owning graph. + The following restrictions apply to graphs which contain allocation and/or memory free nodes: + - Nodes and edges of the graph cannot be deleted. + - The graph cannot be used in a child node. + - Only one instantiation of the graph may exist at any point in time. + - The graph cannot be cloned. + + Returns newly created node + Graph to which to add the node + Dependencies of the node + Number of dependencies + Parameters for the node + + + + + Returns a memory alloc node's parameters + Returns the parameters of a memory alloc node \p hNode in \p params_out. + The \p poolProps and \p accessDescs returned in \p params_out, are owned by the + node.This memory remains valid until the node is destroyed.The returned + parameters must not be modified. + + Node to get the parameters for + Pointer to return the parameters + + + + + Creates a memory free node and adds it to a graph + Creates a new memory free node and adds it to \p hGraph with \p numDependencies + dependencies specified via \p dependencies and arguments specified in \p nodeParams. + It is possible for \p numDependencies to be 0, in which case the node will be placed + at the root of the graph. \p dependencies may not have any duplicate entries. A handle + to the new node will be returned in \p phGraphNode. + ::cuGraphAddMemFreeNode will return ::CUDA_ERROR_INVALID_VALUE if the user attempts to free: + - an allocation twice in the same graph. + - an address that was not returned by an allocation node. + - an invalid address. + The following restrictions apply to graphs which contain allocation and/or memory free nodes: + - Nodes and edges of the graph cannot be deleted. + - The graph cannot be used in a child node. + - Only one instantiation of the graph may exist at any point in time. + - The graph cannot be cloned. + + Returns newly created node + Graph to which to add the node + Dependencies of the node + Number of dependencies + Address of memory to free + + + + + Returns a memory free node's parameters + Returns the address of a memory free node \p hNode in \p dptr_out. + + Node to get the parameters for + Pointer to return the device address + + + + + Free unused memory that was cached on the specified device for use with graphs back to the OS. + Blocks which are not in use by a graph that is either currently executing or scheduled to execute are freed back to the operating system. + + The device for which cached memory should be freed. + + + + + Query asynchronous allocation attributes related to graphs + Valid attributes are: + - ::CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT: Amount of memory, in bytes, currently associated with graphs + - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the last time it was reset.High watermark can only be reset to zero. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT: Amount of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. + + Specifies the scope of the query + attribute to get + retrieved value + + + + + Set asynchronous allocation attributes related to graphs + Valid attributes are: + - ::CU_GRAPH_MEM_ATTR_USED_MEM_HIGH: High watermark of memory, in bytes, associated with graphs since the last time it was reset.High watermark can only be reset to zero. + - ::CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH: High watermark of memory, in bytes, currently allocated for use by the CUDA graphs asynchronous allocator. + + Specifies the scope of the query + attribute to get + pointer to value to set + + Clones a graph @@ -97896,7 +100300,8 @@ Elements in \p from and \p to at corresponding indices define a dependency. Each node in \p from and \p to must belong to \p hGraph. If \p numDependencies is 0, elements in \p from and \p to will be ignored. - Specifying a non-existing dependency will return an error. + Specifying a non-existing dependency will return an error. + Dependencies cannot be removed from graphs which contain allocation or free nodes. Any attempt to do so will return an error. Graph from which to remove dependencies Array of nodes that provide the dependencies @@ -97906,7 +100311,8 @@ Remove a node from the graph - Removes \p hNode from its graph. This operation also severs any dependencies of other nodes on \p hNode and vice versa. + Removes \p hNode from its graph. This operation also severs any dependencies of other nodes on \p hNode and vice versa. + Nodes which belong to a graph which contains allocation or free nodes cannot be destroyed. Any attempt to do so will return an error. Node to remove @@ -97929,6 +100335,194 @@ A character buffer to store diagnostic messages Size of the log buffer in bytes + + + Creates an executable graph from a graph + Instantiates \p hGraph as an executable graph. The graph is validated for any + structural constraints or intra-node constraints which were not previously + validated.If instantiation is successful, a handle to the instantiated graph + is returned in \p phGraphExec. + The \p flags parameter controls the behavior of instantiation and subsequent graph launches.Valid flags are: + - ::CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH, which configures a graph containing memory allocation nodes to automatically free any + unfreed memory allocations before the graph is relaunched. + If \p hGraph contains any allocation or free nodes, there can be at most one + executable graph in existence for that graph at a time. + An attempt to instantiate a second executable graph before destroying the first + with ::cuGraphExecDestroy will result in an error. + + Returns instantiated graph + Graph to instantiate + Flags to control instantiation. See ::CUgraphInstantiate_flags. + + + + + Sets the parameters for a kernel node in the given graphExec + Sets the parameters of a kernel node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + \p hNode must not have been removed from the original graph.The \p func field + of \p nodeParams cannot be modified and must match the original value. + All other values can be modified. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + The executable graph in which to set the specified node + kernel node from the graph from which graphExec was instantiated + Updated Parameters to set + + + + Sets the parameters for a memcpy node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p copyParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + The source and destination memory in \p copyParams must be allocated from the same + contexts as the original source and destination memory. Both the instantiation-time + memory operands and the memory operands in \p copyParams must be 1-dimensional. + Zero-length operations are not supported. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + Returns CUDA_ERROR_INVALID_VALUE if the memory operands' mappings changed or + either the original or new memory operands are multidimensional. + + The executable graph in which to set the specified node + Memcpy node from the graph which was used to instantiate graphExec + The updated parameters to set + Context on which to run the node + + + + Sets the parameters for a memset node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p memsetParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + The destination memory in \p memsetParams must be allocated from the same + contexts as the original destination memory. Both the instantiation-time + memory operand and the memory operand in \p memsetParams must be 1-dimensional. + Zero-length operations are not supported. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + Returns CUDA_ERROR_INVALID_VALUE if the memory operand's mappings changed or + either the original or new memory operand are multidimensional. + + The executable graph in which to set the specified node + Memset node from the graph which was used to instantiate graphExec + The updated parameters to set + Context on which to run the node + + + + Sets the parameters for a host node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though \p hNode had + contained \p nodeParams at instantiation. hNode must remain in the graph which was + used to instantiate \p hGraphExec. Changed edges to and from hNode are ignored. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. hNode is also + not modified by this call. + + The executable graph in which to set the specified node + Host node from the graph which was used to instantiate graphExec + The updated parameters to set + + + + Updates node parameters in the child graph node in the given graphExec. + Updates the work represented by \p hNode in \p hGraphExec as though the nodes contained + in \p hNode's graph had the parameters contained in \p childGraph's nodes at instantiation. + \p hNode must remain in the graph which was used to instantiate \p hGraphExec. + Changed edges to and from \p hNode are ignored. + The modifications only affect future launches of \p hGraphExec. Already enqueued + or running launches of \p hGraphExec are not affected by this call. \p hNode is also + not modified by this call. + The topology of \p childGraph, as well as the node insertion order, must match that + of the graph contained in \p hNode. See::cuGraphExecUpdate() for a list of restrictions + on what can be updated in an instantiated graph.The update is recursive, so child graph + nodes contained within the top level child graph will also be updated. + + The executable graph in which to set the specified node + Host node from the graph which was used to instantiate graphExec + The graph supplying the updated parameters + + + + + Sets the event for an event record node in the given graphExec + Sets the event of an event record node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + The executable graph in which to set the specified node + event record node from the graph from which graphExec was instantiated + Updated event to use + + + + + Sets the event for an event record node in the given graphExec + Sets the event of an event record node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + \p hNode is also not modified by this call. + + The executable graph in which to set the specified node + event wait node from the graph from which graphExec was instantiated + Updated event to use + + + + + Sets the parameters for an external semaphore signal node in the given graphExec + Sets the parameters of an external semaphore signal node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + hNode must not have been removed from the original graph. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + hNode is also not modified by this call. + Changing \p nodeParams->numExtSems is not supported. + + The executable graph in which to set the specified node + semaphore signal node from the graph from which graphExec was instantiated + Updated Parameters to set + + + + + Sets the parameters for an external semaphore wait node in the given graphExec + Sets the parameters of an external semaphore wait node in an executable graph \p hGraphExec. + The node is identified by the corresponding node \p hNode in the + non-executable graph, from which the executable graph was instantiated. + hNode must not have been removed from the original graph. + The modifications only affect future launches of \p hGraphExec. Already + enqueued or running launches of \p hGraphExec are not affected by this call. + hNode is also not modified by this call. + Changing \p nodeParams->numExtSems is not supported. + + The executable graph in which to set the specified node + semaphore wait node from the graph from which graphExec was instantiated + Updated Parameters to set + + + + + Uploads an executable graph in a stream + Uploads \p hGraphExec to the device in \p hStream without executing it.Uploads of + the same \p hGraphExec will be serialized.Each upload is ordered behind both any + previous work in \p hStream and any previous launches of \p hGraphExec. + + Executable graph to upload + Stream in which to upload the graph + + Launches an executable graph in a stream @@ -97957,6 +100551,184 @@ Graph to destroy + + + Check whether an executable graph can be updated with a graph and perform the update if possible + Updates the node parameters in the instantiated graph specified by \p hGraphExec with the + node parameters in a topologically identical graph specified by \p hGraph. + Limitations: + - Kernel nodes: + - The function must not change (same restriction as cuGraphExecKernelNodeSetParams()) + - Memset and memcpy nodes: + - The CUDA device(s) to which the operand(s) was allocated/mapped cannot change. + - The source/destination memory must be allocated from the same contexts as the original + source/destination memory. + - Only 1D memsets can be changed. + - Additional memcpy node restrictions: + - Changing either the source or destination memory type(i.e. CU_MEMORYTYPE_DEVICE, + CU_MEMORYTYPE_ARRAY, etc.) is not supported. + Note: The API may add further restrictions in future releases. The return code should always be checked. + Some node types are not currently supported: + - Empty graph nodes(CU_GRAPH_NODE_TYPE_EMPTY) + - Child graphs(CU_GRAPH_NODE_TYPE_GRAPH). + cuGraphExecUpdate sets \p updateResult_out to CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED under + the following conditions: + - The count of nodes directly in \p hGraphExec and \p hGraph differ, in which case \p hErrorNode_out + is NULL. + - A node is deleted in \p hGraph but not not its pair from \p hGraphExec, in which case \p hErrorNode_out + is NULL. + - A node is deleted in \p hGraphExec but not its pair from \p hGraph, in which case \p hErrorNode_out is + the pairless node from \p hGraph. + - The dependent nodes of a pair differ, in which case \p hErrorNode_out is the node from \p hGraph. + cuGraphExecUpdate sets \p updateResult_out to: + - CU_GRAPH_EXEC_UPDATE_ERROR if passed an invalid value. + - CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED if the graph topology changed + - CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED if the type of a node changed, in which case + \p hErrorNode_out is set to the node from \p hGraph. + - CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED if the func field of a kernel changed, in which + case \p hErrorNode_out is set to the node from \p hGraph + - CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED if any parameters to a node changed in a way + that is not supported, in which case \p hErrorNode_out is set to the node from \p hGraph. + - CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED if something about a node is unsupported, like + the node’s type or configuration, in which case \p hErrorNode_out is set to the node from \p hGraph + If \p updateResult_out isn’t set in one of the situations described above, the update check passes + and cuGraphExecUpdate updates \p hGraphExec to match the contents of \p hGraph. If an error happens + during the update, \p updateResult_out will be set to CU_GRAPH_EXEC_UPDATE_ERROR; otherwise, + \p updateResult_out is set to CU_GRAPH_EXEC_UPDATE_SUCCESS. + cuGraphExecUpdate returns CUDA_SUCCESS when the updated was performed successfully. It returns + CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE if the graph update was not performed because it included + changes which violated constraints specific to instantiated graph update. + + The instantiated graph to be updated + The graph containing the updated parameters + The node which caused the permissibility check to forbid the update, if any + Whether the graph update was permitted. If was forbidden, the reason why + + + + Copies attributes from source node to destination node. + Copies attributes from source node \p src to destination node \p dst. Both node must have the same context. + + Destination node + Source node + + + + Queries node attribute. + Queries attribute \p attr from node \p hNode and stores it in corresponding member of \p value_out. + + + + + + + + + Sets node attribute. + Sets attribute \p attr on node \p hNode from corresponding attribute of value. + + + + + + + + + Write a DOT file describing graph structure + Using the provided \p hGraph, write to \p path a DOT formatted description of the graph. + By default this includes the graph topology, node types, node id, kernel names and memcpy direction. + \p flags can be specified to write more detailed information about each node type such as + parameter values, kernel attributes, node and function handles. + + The graph to create a DOT file from + The path to write the DOT file to + Flags from CUgraphDebugDot_flags for specifying which additional node information to write + + + + + Create a user object + Create a user object with the specified destructor callback and initial reference count. The initial references are owned by the caller. + Destructor callbacks cannot make CUDA API calls and should avoid blocking behavior, as they + are executed by a shared internal thread.Another thread may be signaled to perform such + actions, if it does not block forward progress of tasks scheduled through CUDA. + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + Location to return the user object handle + The pointer to pass to the destroy function + Callback to free the user object when it is no longer in use + The initial refcount to create the object with, typically 1. The initial references are owned by the calling thread. + Currently it is required to pass ::CU_USER_OBJECT_NO_DESTRUCTOR_SYNC, which is the only defined flag. This indicates that the destroy + callback cannot be waited on by any CUDA API.Users requiring synchronization of the callback should signal its completion manually. + + + + + Retain a reference to a user object + Retains new references to a user object. The new references are owned by the caller. + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + The object to retain + The number of references to retain, typically 1. Must be nonzero and not larger than INT_MAX. + + + + + Release a reference to a user object + Releases user object references owned by the caller. The object's destructor is invoked if the reference count reaches zero. + It is undefined behavior to release references not owned by the caller, or to use a user object handle after all references are released. + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + The object to release + The number of references to release, typically 1. Must be nonzero and not larger than INT_MAX. + + + + + Retain a reference to a user object from a graph + Creates or moves user object references that will be owned by a CUDA graph. + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + The graph to associate the reference with + The user object to retain a reference for + The number of references to add to the graph, typically 1. Must be nonzero and not larger than INT_MAX. + The optional flag ::CU_GRAPH_USER_OBJECT_MOVE transfers references from the calling thread, rather than create new references.Pass None to create new references. + + + + + Release a user object reference from a graph + Releases user object references owned by a graph. + See CUDA User Objects in the CUDA C++ Programming Guide for more information on user objects. + + The graph that will release the reference + The user object to release a reference for + The number of references to release, typically 1. Must be nonzero and not larger than INT_MAX. + + + + + + + + + + Blocks until remote writes are visible to the specified scope + Blocks until GPUDirect RDMA writes to the target context via mappings + created through APIs like nvidia_p2p_get_pages(see + https://docs.nvidia.com/cuda/gpudirect-rdma for more information), are + visible to the specified scope. + + If the scope equals or lies within the scope indicated by + ::CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING, the call + will be a no-op and can be safely omitted for performance.This can be + determined by comparing the numerical values between the two enums, with + smaller scopes having smaller values. + Users may query support for this API via ::CU_DEVICE_ATTRIBUTE_FLUSH_FLUSH_GPU_DIRECT_RDMA_OPTIONS. + + The target of the operation, see ::CUflushGPUDirectRDMAWritesTarget + The scope of the operation, see ::CUflushGPUDirectRDMAWritesScope + A CUDA function or CUDA kernel @@ -99374,6 +102146,14 @@ block size permitted by the device / function instead. Flags + + + Returns dynamic shared memory available per block when launching \p numBlocks blocks on SM + Returns in \p *dynamicSmemSize the maximum size of dynamic shared memory to allow \p numBlocks blocks per SM. + + Number of blocks to fit on SM + Size of the blocks + Sets the grid dimensions according to block dimensions, so that each dimension has at least computeSize threads diff --git a/src/external/ManagedCuda/NVRTC.XML b/src/external/ManagedCuda/NVRTC.XML index d3b2df27..15d3bfd1 100644 --- a/src/external/ManagedCuda/NVRTC.XML +++ b/src/external/ManagedCuda/NVRTC.XML @@ -52,6 +52,12 @@ + + + + + + @@ -200,6 +206,48 @@ Compiled result. + + + nvrtcGetCUBINSize sets \p cubinSizeRet with the size of the cubin + generated by the previous compilation of \p prog.The value of + cubinSizeRet is set to 0 if the value specified to \c -arch is a + virtual architecture instead of an actual architecture. + + CUDA Runtime Compilation program. + Size of the generated cubin. + + + + nvrtcGetCUBIN stores the cubin generated by the previous compilation + of \p prog in the memory pointed by \p cubin.No cubin is available + if the value specified to \c -arch is a virtual architecture instead + of an actual architecture. + + prog CUDA Runtime Compilation program. + cubin Compiled and assembled result. + + + + nvrtcGetNVVMSize sets \p nvvmSizeRet with the size of the NVVM + generated by the previous compilation of \p prog.The value of + nvvmSizeRet is set to 0 if the program was not compiled with + -dlto. + + CUDA Runtime Compilation program. + Size of the generated NVVM. + + + + + nvrtcGetNVVM stores the NVVM generated by the previous compilation + of \p prog in the memory pointed by \p nvvm. + The program must have been compiled with -dlto, + otherwise will return an error. + + prog CUDA Runtime Compilation program. + nvvm Compiled result. + + sets logSizeRet with the size of the log generated by the previous compilation of prog (including the trailing NULL). diff --git a/src/external/ManagedCuda/NVRTC.dll b/src/external/ManagedCuda/NVRTC.dll index 3961efb8..bdbd8fa8 100644 Binary files a/src/external/ManagedCuda/NVRTC.dll and b/src/external/ManagedCuda/NVRTC.dll differ