Skip to content

Commit 28538c3

Browse files
SundarRajan28Copilotkiritigowda
authored
VX_RPP - Log1p augmentation support (#1503)
* Add log1p support * Resolving review comments * Rename log1p extension * Fix hipHostMalloc bug Co-authored-by: Copilot <[email protected]> * Minor changes * Minor changes * Bug fixes --------- Co-authored-by: Copilot <[email protected]> Co-authored-by: Kiriti Gowda <[email protected]>
1 parent 19f39cc commit 28538c3

File tree

9 files changed

+296
-4
lines changed

9 files changed

+296
-4
lines changed

amd_openvx_extensions/amd_rpp/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ list(APPEND SOURCES
176176
source/tensor/WarpAffine.cpp
177177
source/tensor/Slice.cpp
178178
source/tensor/Transpose.cpp
179+
source/tensor/Log1p.cpp
179180
source/kernel_rpp.cpp
180181
source/internal_publishKernels.cpp
181182
)

amd_openvx_extensions/amd_rpp/include/internal_publishKernels.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ vx_status Slice_Register(vx_context);
167167
vx_status Normalize_Register(vx_context);
168168
vx_status MelFilterBank_Register(vx_context);
169169
vx_status Transpose_Register(vx_context);
170+
vx_status Log1p_Register(vx_context);
170171

171172
// kernel names
172173
#define VX_KERNEL_RPP_NOPBATCHPD_NAME "org.rpp.NopbatchPD"
@@ -303,5 +304,6 @@ vx_status Transpose_Register(vx_context);
303304
#define VX_KERNEL_RPP_NORMALIZE_NAME "org.rpp.Normalize"
304305
#define VX_KERNEL_RPP_MELFILTERBANK_NAME "org.rpp.MelFilterBank"
305306
#define VX_KERNEL_RPP_TRANSPOSE_NAME "org.rpp.Transpose"
307+
#define VX_KERNEL_RPP_LOG1P_NAME "org.rpp.Log1p"
306308

307309
#endif //_AMDVX_EXT__PUBLISH_KERNELS_H_

amd_openvx_extensions/amd_rpp/include/internal_rpp.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,9 @@ enum vxTensorLayout {
7272
VX_NFCHW = 3,
7373
VX_NHW = 4, // Audio/2D layout
7474
VX_NFT = 5, // Frequency major, Used for Spectrogram/MelFilterBank
75-
VX_NTF = 6 // Time major, Used for Spectrogram/MelFilterBank
75+
VX_NTF = 6, // Time major, Used for Spectrogram/MelFilterBank
76+
VX_NDHWC = 7,
77+
VX_NCDHW = 8
7678
};
7779

7880
const std::map<vxTensorLayout, RpptLayout> tensorLayoutMapping = {
@@ -83,8 +85,10 @@ const std::map<vxTensorLayout, RpptLayout> tensorLayoutMapping = {
8385
#if RPP_AUDIO
8486
{vxTensorLayout::VX_NHW, RpptLayout::NHW},
8587
{vxTensorLayout::VX_NFT, RpptLayout::NFT},
86-
{vxTensorLayout::VX_NTF, RpptLayout::NTF}
88+
{vxTensorLayout::VX_NTF, RpptLayout::NTF},
8789
#endif
90+
{vxTensorLayout::VX_NDHWC, RpptLayout::NDHWC},
91+
{vxTensorLayout::VX_NCDHW, RpptLayout::NCDHW}
8892
};
8993

9094
//! Brief The utility functions

amd_openvx_extensions/amd_rpp/include/kernels_rpp.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,8 @@ extern "C"
159159
VX_KERNEL_RPP_SLICE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x7c,
160160
VX_KERNEL_RPP_NORMALIZE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x7d,
161161
VX_KERNEL_RPP_MELFILTERBANK = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x7e,
162-
VX_KERNEL_RPP_TRANSPOSE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x7f
162+
VX_KERNEL_RPP_TRANSPOSE = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x7f,
163+
VX_KERNEL_RPP_LOG1P = VX_KERNEL_BASE(VX_ID_AMD, VX_LIBRARY_RPP) + 0x80
163164
};
164165

165166
#ifdef __cplusplus

amd_openvx_extensions/amd_rpp/include/vx_ext_rpp.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,6 +2042,17 @@ extern "C"
20422042
*/
20432043
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppTranspose(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_array pPerm, vx_scalar inputLayout, vx_scalar outputLayout, vx_scalar roiType);
20442044

2045+
/*! \brief [Graph] Computes the natural logarithm of 1 + input element-wise and returns the output.
2046+
* \ingroup group_amd_rpp
2047+
* \param [in] graph The handle to the graph.
2048+
* \param [in] pSrc The input tensor in <tt>\ref VX_TYPE_INT16</tt> format data.
2049+
* \param [in] pSrcRoi The input tensor of batch size in <tt>unsigned int</tt> containing the roi values for the input.
2050+
* \param [out] pDst The output tensor in <tt>\ref VX_TYPE_FLOAT32</tt> format data.
2051+
* \param [in] inputLayout The input layout in <tt>\ref VX_TYPE_INT32</tt> denotes the layout of input tensor.
2052+
* \return A node reference <tt>\ref vx_node</tt>. Any possible errors preventing a successful creation should be checked using <tt>\ref vxGetStatus</tt>.
2053+
*/
2054+
SHARED_PUBLIC vx_node VX_API_CALL vxExtRppLog1p(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout);
2055+
20452056
#ifdef __cplusplus
20462057
}
20472058
#endif

amd_openvx_extensions/amd_rpp/source/internal_publishKernels.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ vx_status get_kernels_to_publish()
172172
STATUS_ERROR_CHECK(ADD_KERNEL(Normalize_Register));
173173
STATUS_ERROR_CHECK(ADD_KERNEL(MelFilterBank_Register));
174174
STATUS_ERROR_CHECK(ADD_KERNEL(Transpose_Register));
175+
STATUS_ERROR_CHECK(ADD_KERNEL(Log1p_Register));
175176
return status;
176177
}
177178

amd_openvx_extensions/amd_rpp/source/kernel_rpp.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2795,6 +2795,23 @@ VX_API_ENTRY vx_node VX_API_CALL vxExtRppTranspose(vx_graph graph, vx_tensor pSr
27952795
return node;
27962796
}
27972797

2798+
VX_API_ENTRY vx_node VX_API_CALL vxExtRppLog1p(vx_graph graph, vx_tensor pSrc, vx_tensor pSrcRoi, vx_tensor pDst, vx_scalar inputLayout) {
2799+
vx_node node = NULL;
2800+
vx_context context = vxGetContext((vx_reference)graph);
2801+
if (vxGetStatus((vx_reference)context) == VX_SUCCESS) {
2802+
vx_uint32 devtype = getGraphAffinity(graph);
2803+
vx_scalar deviceType = vxCreateScalar(vxGetContext((vx_reference)graph), VX_TYPE_UINT32, &devtype);
2804+
vx_reference params[] = {
2805+
(vx_reference)pSrc,
2806+
(vx_reference)pSrcRoi,
2807+
(vx_reference)pDst,
2808+
(vx_reference)inputLayout,
2809+
(vx_reference)deviceType};
2810+
node = createNode(graph, VX_KERNEL_RPP_LOG1P, params, 5);
2811+
}
2812+
return node;
2813+
}
2814+
27982815
RpptDataType getRpptDataType(vx_enum vxDataType) {
27992816
switch(vxDataType) {
28002817
case vx_type_e::VX_TYPE_FLOAT32:
@@ -2803,6 +2820,8 @@ RpptDataType getRpptDataType(vx_enum vxDataType) {
28032820
return RpptDataType::F16;
28042821
case vx_type_e::VX_TYPE_INT8:
28052822
return RpptDataType::I8;
2823+
case vx_type_e::VX_TYPE_INT16:
2824+
return RpptDataType::I16;
28062825
default:
28072826
return RpptDataType::U8;
28082827
}
@@ -2923,6 +2942,22 @@ void fillGenericDescriptionPtrfromDims(RpptGenericDescPtr &genericDescPtr, vxTen
29232942
genericDescPtr->strides[3] = 1;
29242943
break;
29252944
}
2945+
case vxTensorLayout::VX_NCDHW:
2946+
case vxTensorLayout::VX_NDHWC: {
2947+
genericDescPtr->numDims = 5;
2948+
genericDescPtr->dims[0] = tensorDims[0];
2949+
genericDescPtr->dims[1] = tensorDims[1];
2950+
genericDescPtr->dims[2] = tensorDims[2];
2951+
genericDescPtr->dims[3] = tensorDims[3];
2952+
genericDescPtr->dims[4] = tensorDims[4];
2953+
2954+
genericDescPtr->strides[0] = genericDescPtr->dims[1] * genericDescPtr->dims[2] * genericDescPtr->dims[3] * genericDescPtr->dims[4];
2955+
genericDescPtr->strides[1] = genericDescPtr->dims[2] * genericDescPtr->dims[3] * genericDescPtr->dims[4];
2956+
genericDescPtr->strides[2] = genericDescPtr->dims[3] * genericDescPtr->dims[4];
2957+
genericDescPtr->strides[3] = genericDescPtr->dims[4];
2958+
genericDescPtr->strides[4] = 1;
2959+
break;
2960+
}
29262961
case vxTensorLayout::VX_NHW:
29272962
case vxTensorLayout::VX_NFT:
29282963
case vxTensorLayout::VX_NTF: {
Lines changed: 237 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
/*
2+
Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
3+
4+
Permission is hereby granted, free of charge, to any person obtaining a copy
5+
of this software and associated documentation files (the "Software"), to deal
6+
in the Software without restriction, including without limitation the rights
7+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8+
copies of the Software, and to permit persons to whom the Software is
9+
furnished to do so, subject to the following conditions:
10+
11+
The above copyright notice and this permission notice shall be included in
12+
all copies or substantial portions of the Software.
13+
14+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20+
THE SOFTWARE.
21+
*/
22+
23+
#include "internal_publishKernels.h"
24+
25+
struct Log1pLocalData {
26+
vxRppHandle *handle;
27+
Rpp32u deviceType;
28+
RppPtr_t pSrc;
29+
RppPtr_t pDst;
30+
RpptGenericDescPtr pSrcGenericDesc;
31+
RpptGenericDescPtr pDstGenericDesc;
32+
Rpp32u *pSrcRoi;
33+
vxTensorLayout inputLayout;
34+
size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
35+
size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
36+
};
37+
38+
static vx_status VX_CALLBACK refreshLog1p(vx_node node, const vx_reference *parameters, vx_uint32 num, Log1pLocalData *data) {
39+
vx_status status = VX_SUCCESS;
40+
void *roi_tensor_ptr;
41+
if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
42+
#if ENABLE_OPENCL
43+
return VX_ERROR_NOT_IMPLEMENTED;
44+
#elif ENABLE_HIP
45+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HIP, &data->pSrc, sizeof(data->pSrc)));
46+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
47+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HIP, &data->pDst, sizeof(data->pDst)));
48+
#endif
49+
} else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
50+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_BUFFER_HOST, &data->pSrc, sizeof(data->pSrc)));
51+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[1], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof(roi_tensor_ptr)));
52+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_BUFFER_HOST, &data->pDst, sizeof(data->pDst)));
53+
}
54+
data->pSrcRoi = static_cast<unsigned *>(roi_tensor_ptr);
55+
return status;
56+
}
57+
58+
static vx_status VX_CALLBACK validateLog1p(vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
59+
vx_status status = VX_SUCCESS;
60+
vx_enum scalar_type;
61+
62+
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[3], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
63+
if (scalar_type != VX_TYPE_INT32)
64+
return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #3 type=%d (must be size)\n", scalar_type);
65+
STATUS_ERROR_CHECK(vxQueryScalar((vx_scalar)parameters[4], VX_SCALAR_TYPE, &scalar_type, sizeof(scalar_type)));
66+
if (scalar_type != VX_TYPE_UINT32)
67+
return ERRMSG(VX_ERROR_INVALID_TYPE, "validate: Parameter: #4 type=%d (must be size)\n", scalar_type);
68+
69+
// Check for input parameters
70+
size_t num_tensor_dims;
71+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
72+
if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Log1p: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims);
73+
74+
// Check for output parameters
75+
vx_uint8 tensor_fixed_point_position;
76+
size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
77+
vx_enum tensor_datatype;
78+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
79+
if (num_tensor_dims < 3) return ERRMSG(VX_ERROR_INVALID_DIMENSION, "validate: Log1p: tensor: #2 dimensions=%lu (must be greater than or equal to 3)\n", num_tensor_dims);
80+
81+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
82+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype)));
83+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
84+
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof(num_tensor_dims)));
85+
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DIMS, &tensor_dims, sizeof(tensor_dims)));
86+
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof(tensor_datatype)));
87+
STATUS_ERROR_CHECK(vxSetMetaFormatAttribute(metas[2], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof(tensor_fixed_point_position)));
88+
return status;
89+
}
90+
91+
static vx_status VX_CALLBACK processLog1p(vx_node node, const vx_reference *parameters, vx_uint32 num) {
92+
RppStatus rpp_status = RPP_SUCCESS;
93+
vx_status return_status = VX_SUCCESS;
94+
Log1pLocalData *data = NULL;
95+
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
96+
refreshLog1p(node, parameters, num, data);
97+
if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
98+
#if ENABLE_OPENCL
99+
return_status = VX_ERROR_NOT_IMPLEMENTED;
100+
#elif ENABLE_HIP
101+
rpp_status = rppt_log1p_gpu(data->pSrc, data->pSrcGenericDesc, data->pDst, data->pDstGenericDesc, data->pSrcRoi, data->handle->rppHandle);
102+
return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
103+
#endif
104+
}
105+
if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
106+
rpp_status = rppt_log1p_host(data->pSrc, data->pSrcGenericDesc, data->pDst, data->pDstGenericDesc, data->pSrcRoi, data->handle->rppHandle);
107+
return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
108+
}
109+
return return_status;
110+
}
111+
112+
static vx_status VX_CALLBACK initializeLog1p(vx_node node, const vx_reference *parameters, vx_uint32 num) {
113+
Log1pLocalData *data = new Log1pLocalData;
114+
if (data) {
115+
memset(data, 0, sizeof(Log1pLocalData));
116+
117+
vx_enum input_tensor_dtype, output_tensor_dtype;
118+
vx_int32 input_layout;
119+
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[3], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
120+
STATUS_ERROR_CHECK(vxCopyScalar((vx_scalar)parameters[4], &data->deviceType, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
121+
data->inputLayout = static_cast<vxTensorLayout>(input_layout);
122+
123+
if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
124+
data->pSrcGenericDesc = new RpptGenericDesc;
125+
data->pDstGenericDesc = new RpptGenericDesc;
126+
} else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
127+
#if ENABLE_HIP
128+
hipHostMalloc(&data->pSrcGenericDesc, sizeof(RpptGenericDesc));
129+
hipHostMalloc(&data->pDstGenericDesc, sizeof(RpptGenericDesc));
130+
#endif
131+
}
132+
// Querying for input tensor
133+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcGenericDesc->numDims, sizeof(data->pSrcGenericDesc->numDims)));
134+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DIMS, &data->inputTensorDims, sizeof(vx_size) * data->pSrcGenericDesc->numDims));
135+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[0], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof(input_tensor_dtype)));
136+
data->pSrcGenericDesc->dataType = getRpptDataType(input_tensor_dtype);
137+
data->pSrcGenericDesc->offsetInBytes = 0;
138+
fillGenericDescriptionPtrfromDims(data->pSrcGenericDesc, data->inputLayout, data->inputTensorDims);
139+
140+
// Querying for output tensor
141+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstGenericDesc->numDims, sizeof(data->pDstGenericDesc->numDims)));
142+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DIMS, &data->outputTensorDims, sizeof(vx_size) * data->pDstGenericDesc->numDims));
143+
STATUS_ERROR_CHECK(vxQueryTensor((vx_tensor)parameters[2], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof(output_tensor_dtype)));
144+
data->pDstGenericDesc->dataType = getRpptDataType(output_tensor_dtype);
145+
data->pDstGenericDesc->offsetInBytes = 0;
146+
fillGenericDescriptionPtrfromDims(data->pDstGenericDesc, data->inputLayout, data->outputTensorDims);
147+
148+
refreshLog1p(node, parameters, num, data);
149+
STATUS_ERROR_CHECK(createRPPHandle(node, &data->handle, data->inputTensorDims[0], data->deviceType));
150+
STATUS_ERROR_CHECK(vxSetNodeAttribute(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
151+
return VX_SUCCESS;
152+
} else {
153+
return VX_FAILURE;
154+
}
155+
}
156+
157+
static vx_status VX_CALLBACK uninitializeLog1p(vx_node node, const vx_reference *parameters, vx_uint32 num) {
158+
Log1pLocalData *data;
159+
STATUS_ERROR_CHECK(vxQueryNode(node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof(data)));
160+
STATUS_ERROR_CHECK(releaseRPPHandle(node, data->handle, data->deviceType));
161+
if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
162+
#if ENABLE_HIP
163+
if (data->pSrcGenericDesc) {
164+
hipError_t err = hipHostFree(data->pSrcGenericDesc);
165+
if (err != hipSuccess)
166+
std::cerr << "\n[ERR] hipHostFree failed " << std::to_string(err) << "\n";
167+
}
168+
if (data->pDstGenericDesc) {
169+
hipError_t err = hipHostFree(data->pDstGenericDesc);
170+
if (err != hipSuccess)
171+
std::cerr << "\n[ERR] hipHostFree failed " << std::to_string(err) << "\n";
172+
}
173+
#endif
174+
} else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
175+
if (data->pSrcGenericDesc) delete data->pSrcGenericDesc;
176+
if (data->pDstGenericDesc) delete data->pDstGenericDesc;
177+
}
178+
if (data) delete data;
179+
return VX_SUCCESS;
180+
}
181+
182+
//! \brief The kernel target support callback.
183+
// TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
184+
static vx_status VX_CALLBACK query_target_support(vx_graph graph, vx_node node,
185+
vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
186+
vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
187+
) {
188+
vx_context context = vxGetContext((vx_reference)graph);
189+
AgoTargetAffinityInfo affinity;
190+
vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
191+
if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
192+
supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
193+
else
194+
supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
195+
196+
return VX_SUCCESS;
197+
}
198+
199+
vx_status Log1p_Register(vx_context context) {
200+
vx_status status = VX_SUCCESS;
201+
// Add kernel to the context with callbacks
202+
vx_kernel kernel = vxAddUserKernel(context, "org.rpp.Log1p",
203+
VX_KERNEL_RPP_LOG1P,
204+
processLog1p,
205+
5,
206+
validateLog1p,
207+
initializeLog1p,
208+
uninitializeLog1p);
209+
ERROR_CHECK_OBJECT(kernel);
210+
AgoTargetAffinityInfo affinity;
211+
vxQueryContext(context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof(affinity));
212+
#if ENABLE_HIP
213+
vx_bool enableBufferAccess = vx_true_e;
214+
if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
215+
STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof(enableBufferAccess)));
216+
#else
217+
vx_bool enableBufferAccess = vx_false_e;
218+
#endif
219+
amd_kernel_query_target_support_f query_target_support_f = query_target_support;
220+
221+
if (kernel) {
222+
STATUS_ERROR_CHECK(vxSetKernelAttribute(kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof(query_target_support_f)));
223+
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 0, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
224+
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
225+
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 2, VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
226+
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 3, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
227+
PARAM_ERROR_CHECK(vxAddParameterToKernel(kernel, 4, VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
228+
PARAM_ERROR_CHECK(vxFinalizeKernel(kernel));
229+
}
230+
if (status != VX_SUCCESS) {
231+
exit:
232+
vxRemoveKernel(kernel);
233+
return VX_FAILURE;
234+
}
235+
236+
return status;
237+
}

0 commit comments

Comments
 (0)