1
+ /*
2
+ Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved.
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ of this software and associated documentation files (the "Software"), to deal
6
+ in the Software without restriction, including without limitation the rights
7
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the Software is
9
+ furnished to do so, subject to the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be included in
12
+ all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ THE SOFTWARE.
21
+ */
22
+
23
+ #include " internal_publishKernels.h"
24
+
25
+ struct Log1pLocalData {
26
+ vxRppHandle *handle;
27
+ Rpp32u deviceType;
28
+ RppPtr_t pSrc;
29
+ RppPtr_t pDst;
30
+ RpptGenericDescPtr pSrcGenericDesc;
31
+ RpptGenericDescPtr pDstGenericDesc;
32
+ Rpp32u *pSrcRoi;
33
+ vxTensorLayout inputLayout;
34
+ size_t inputTensorDims[RPP_MAX_TENSOR_DIMS];
35
+ size_t outputTensorDims[RPP_MAX_TENSOR_DIMS];
36
+ };
37
+
38
+ static vx_status VX_CALLBACK refreshLog1p (vx_node node, const vx_reference *parameters, vx_uint32 num, Log1pLocalData *data) {
39
+ vx_status status = VX_SUCCESS;
40
+ void *roi_tensor_ptr;
41
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
42
+ #if ENABLE_OPENCL
43
+ return VX_ERROR_NOT_IMPLEMENTED;
44
+ #elif ENABLE_HIP
45
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[0 ], VX_TENSOR_BUFFER_HIP, &data->pSrc , sizeof (data->pSrc )));
46
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[1 ], VX_TENSOR_BUFFER_HIP, &roi_tensor_ptr, sizeof (roi_tensor_ptr)));
47
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_BUFFER_HIP, &data->pDst , sizeof (data->pDst )));
48
+ #endif
49
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
50
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[0 ], VX_TENSOR_BUFFER_HOST, &data->pSrc , sizeof (data->pSrc )));
51
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[1 ], VX_TENSOR_BUFFER_HOST, &roi_tensor_ptr, sizeof (roi_tensor_ptr)));
52
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_BUFFER_HOST, &data->pDst , sizeof (data->pDst )));
53
+ }
54
+ data->pSrcRoi = static_cast <unsigned *>(roi_tensor_ptr);
55
+ return status;
56
+ }
57
+
58
+ static vx_status VX_CALLBACK validateLog1p (vx_node node, const vx_reference parameters[], vx_uint32 num, vx_meta_format metas[]) {
59
+ vx_status status = VX_SUCCESS;
60
+ vx_enum scalar_type;
61
+
62
+ STATUS_ERROR_CHECK (vxQueryScalar ((vx_scalar)parameters[3 ], VX_SCALAR_TYPE, &scalar_type, sizeof (scalar_type)));
63
+ if (scalar_type != VX_TYPE_INT32)
64
+ return ERRMSG (VX_ERROR_INVALID_TYPE, " validate: Parameter: #3 type=%d (must be size)\n " , scalar_type);
65
+ STATUS_ERROR_CHECK (vxQueryScalar ((vx_scalar)parameters[4 ], VX_SCALAR_TYPE, &scalar_type, sizeof (scalar_type)));
66
+ if (scalar_type != VX_TYPE_UINT32)
67
+ return ERRMSG (VX_ERROR_INVALID_TYPE, " validate: Parameter: #4 type=%d (must be size)\n " , scalar_type);
68
+
69
+ // Check for input parameters
70
+ size_t num_tensor_dims;
71
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[0 ], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof (num_tensor_dims)));
72
+ if (num_tensor_dims < 3 ) return ERRMSG (VX_ERROR_INVALID_DIMENSION, " validate: Log1p: tensor: #0 dimensions=%lu (must be greater than or equal to 3)\n " , num_tensor_dims);
73
+
74
+ // Check for output parameters
75
+ vx_uint8 tensor_fixed_point_position;
76
+ size_t tensor_dims[RPP_MAX_TENSOR_DIMS];
77
+ vx_enum tensor_datatype;
78
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof (num_tensor_dims)));
79
+ if (num_tensor_dims < 3 ) return ERRMSG (VX_ERROR_INVALID_DIMENSION, " validate: Log1p: tensor: #2 dimensions=%lu (must be greater than or equal to 3)\n " , num_tensor_dims);
80
+
81
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_DIMS, &tensor_dims, sizeof (tensor_dims)));
82
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof (tensor_datatype)));
83
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof (tensor_fixed_point_position)));
84
+ STATUS_ERROR_CHECK (vxSetMetaFormatAttribute (metas[2 ], VX_TENSOR_NUMBER_OF_DIMS, &num_tensor_dims, sizeof (num_tensor_dims)));
85
+ STATUS_ERROR_CHECK (vxSetMetaFormatAttribute (metas[2 ], VX_TENSOR_DIMS, &tensor_dims, sizeof (tensor_dims)));
86
+ STATUS_ERROR_CHECK (vxSetMetaFormatAttribute (metas[2 ], VX_TENSOR_DATA_TYPE, &tensor_datatype, sizeof (tensor_datatype)));
87
+ STATUS_ERROR_CHECK (vxSetMetaFormatAttribute (metas[2 ], VX_TENSOR_FIXED_POINT_POSITION, &tensor_fixed_point_position, sizeof (tensor_fixed_point_position)));
88
+ return status;
89
+ }
90
+
91
+ static vx_status VX_CALLBACK processLog1p (vx_node node, const vx_reference *parameters, vx_uint32 num) {
92
+ RppStatus rpp_status = RPP_SUCCESS;
93
+ vx_status return_status = VX_SUCCESS;
94
+ Log1pLocalData *data = NULL ;
95
+ STATUS_ERROR_CHECK (vxQueryNode (node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof (data)));
96
+ refreshLog1p (node, parameters, num, data);
97
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
98
+ #if ENABLE_OPENCL
99
+ return_status = VX_ERROR_NOT_IMPLEMENTED;
100
+ #elif ENABLE_HIP
101
+ rpp_status = rppt_log1p_gpu (data->pSrc , data->pSrcGenericDesc , data->pDst , data->pDstGenericDesc , data->pSrcRoi , data->handle ->rppHandle );
102
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
103
+ #endif
104
+ }
105
+ if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
106
+ rpp_status = rppt_log1p_host (data->pSrc , data->pSrcGenericDesc , data->pDst , data->pDstGenericDesc , data->pSrcRoi , data->handle ->rppHandle );
107
+ return_status = (rpp_status == RPP_SUCCESS) ? VX_SUCCESS : VX_FAILURE;
108
+ }
109
+ return return_status;
110
+ }
111
+
112
+ static vx_status VX_CALLBACK initializeLog1p (vx_node node, const vx_reference *parameters, vx_uint32 num) {
113
+ Log1pLocalData *data = new Log1pLocalData;
114
+ if (data) {
115
+ memset (data, 0 , sizeof (Log1pLocalData));
116
+
117
+ vx_enum input_tensor_dtype, output_tensor_dtype;
118
+ vx_int32 input_layout;
119
+ STATUS_ERROR_CHECK (vxCopyScalar ((vx_scalar)parameters[3 ], &input_layout, VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
120
+ STATUS_ERROR_CHECK (vxCopyScalar ((vx_scalar)parameters[4 ], &data->deviceType , VX_READ_ONLY, VX_MEMORY_TYPE_HOST));
121
+ data->inputLayout = static_cast <vxTensorLayout>(input_layout);
122
+
123
+ if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
124
+ data->pSrcGenericDesc = new RpptGenericDesc;
125
+ data->pDstGenericDesc = new RpptGenericDesc;
126
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
127
+ #if ENABLE_HIP
128
+ hipHostMalloc (&data->pSrcGenericDesc , sizeof (RpptGenericDesc));
129
+ hipHostMalloc (&data->pDstGenericDesc , sizeof (RpptGenericDesc));
130
+ #endif
131
+ }
132
+ // Querying for input tensor
133
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[0 ], VX_TENSOR_NUMBER_OF_DIMS, &data->pSrcGenericDesc ->numDims , sizeof (data->pSrcGenericDesc ->numDims )));
134
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[0 ], VX_TENSOR_DIMS, &data->inputTensorDims , sizeof (vx_size) * data->pSrcGenericDesc ->numDims ));
135
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[0 ], VX_TENSOR_DATA_TYPE, &input_tensor_dtype, sizeof (input_tensor_dtype)));
136
+ data->pSrcGenericDesc ->dataType = getRpptDataType (input_tensor_dtype);
137
+ data->pSrcGenericDesc ->offsetInBytes = 0 ;
138
+ fillGenericDescriptionPtrfromDims (data->pSrcGenericDesc , data->inputLayout , data->inputTensorDims );
139
+
140
+ // Querying for output tensor
141
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_NUMBER_OF_DIMS, &data->pDstGenericDesc ->numDims , sizeof (data->pDstGenericDesc ->numDims )));
142
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_DIMS, &data->outputTensorDims , sizeof (vx_size) * data->pDstGenericDesc ->numDims ));
143
+ STATUS_ERROR_CHECK (vxQueryTensor ((vx_tensor)parameters[2 ], VX_TENSOR_DATA_TYPE, &output_tensor_dtype, sizeof (output_tensor_dtype)));
144
+ data->pDstGenericDesc ->dataType = getRpptDataType (output_tensor_dtype);
145
+ data->pDstGenericDesc ->offsetInBytes = 0 ;
146
+ fillGenericDescriptionPtrfromDims (data->pDstGenericDesc , data->inputLayout , data->outputTensorDims );
147
+
148
+ refreshLog1p (node, parameters, num, data);
149
+ STATUS_ERROR_CHECK (createRPPHandle (node, &data->handle , data->inputTensorDims [0 ], data->deviceType ));
150
+ STATUS_ERROR_CHECK (vxSetNodeAttribute (node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof (data)));
151
+ return VX_SUCCESS;
152
+ } else {
153
+ return VX_FAILURE;
154
+ }
155
+ }
156
+
157
+ static vx_status VX_CALLBACK uninitializeLog1p (vx_node node, const vx_reference *parameters, vx_uint32 num) {
158
+ Log1pLocalData *data;
159
+ STATUS_ERROR_CHECK (vxQueryNode (node, VX_NODE_LOCAL_DATA_PTR, &data, sizeof (data)));
160
+ STATUS_ERROR_CHECK (releaseRPPHandle (node, data->handle , data->deviceType ));
161
+ if (data->deviceType == AGO_TARGET_AFFINITY_GPU) {
162
+ #if ENABLE_HIP
163
+ if (data->pSrcGenericDesc ) {
164
+ hipError_t err = hipHostFree (data->pSrcGenericDesc );
165
+ if (err != hipSuccess)
166
+ std::cerr << " \n [ERR] hipHostFree failed " << std::to_string (err) << " \n " ;
167
+ }
168
+ if (data->pDstGenericDesc ) {
169
+ hipError_t err = hipHostFree (data->pDstGenericDesc );
170
+ if (err != hipSuccess)
171
+ std::cerr << " \n [ERR] hipHostFree failed " << std::to_string (err) << " \n " ;
172
+ }
173
+ #endif
174
+ } else if (data->deviceType == AGO_TARGET_AFFINITY_CPU) {
175
+ if (data->pSrcGenericDesc ) delete data->pSrcGenericDesc ;
176
+ if (data->pDstGenericDesc ) delete data->pDstGenericDesc ;
177
+ }
178
+ if (data) delete data;
179
+ return VX_SUCCESS;
180
+ }
181
+
182
+ // ! \brief The kernel target support callback.
183
+ // TODO::currently the node is setting the same affinity as context. This needs to change when we have hybrid modes in the same graph
184
+ static vx_status VX_CALLBACK query_target_support (vx_graph graph, vx_node node,
185
+ vx_bool use_opencl_1_2, // [input] false: OpenCL driver is 2.0+; true: OpenCL driver is 1.2
186
+ vx_uint32 &supported_target_affinity // [output] must be set to AGO_TARGET_AFFINITY_CPU or AGO_TARGET_AFFINITY_GPU or (AGO_TARGET_AFFINITY_CPU | AGO_TARGET_AFFINITY_GPU)
187
+ ) {
188
+ vx_context context = vxGetContext ((vx_reference)graph);
189
+ AgoTargetAffinityInfo affinity;
190
+ vxQueryContext (context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof (affinity));
191
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
192
+ supported_target_affinity = AGO_TARGET_AFFINITY_GPU;
193
+ else
194
+ supported_target_affinity = AGO_TARGET_AFFINITY_CPU;
195
+
196
+ return VX_SUCCESS;
197
+ }
198
+
199
+ vx_status Log1p_Register (vx_context context) {
200
+ vx_status status = VX_SUCCESS;
201
+ // Add kernel to the context with callbacks
202
+ vx_kernel kernel = vxAddUserKernel (context, " org.rpp.Log1p" ,
203
+ VX_KERNEL_RPP_LOG1P,
204
+ processLog1p,
205
+ 5 ,
206
+ validateLog1p,
207
+ initializeLog1p,
208
+ uninitializeLog1p);
209
+ ERROR_CHECK_OBJECT (kernel);
210
+ AgoTargetAffinityInfo affinity;
211
+ vxQueryContext (context, VX_CONTEXT_ATTRIBUTE_AMD_AFFINITY, &affinity, sizeof (affinity));
212
+ #if ENABLE_HIP
213
+ vx_bool enableBufferAccess = vx_true_e;
214
+ if (affinity.device_type == AGO_TARGET_AFFINITY_GPU)
215
+ STATUS_ERROR_CHECK (vxSetKernelAttribute (kernel, VX_KERNEL_ATTRIBUTE_AMD_GPU_BUFFER_ACCESS_ENABLE, &enableBufferAccess, sizeof (enableBufferAccess)));
216
+ #else
217
+ vx_bool enableBufferAccess = vx_false_e;
218
+ #endif
219
+ amd_kernel_query_target_support_f query_target_support_f = query_target_support;
220
+
221
+ if (kernel) {
222
+ STATUS_ERROR_CHECK (vxSetKernelAttribute (kernel, VX_KERNEL_ATTRIBUTE_AMD_QUERY_TARGET_SUPPORT, &query_target_support_f, sizeof (query_target_support_f)));
223
+ PARAM_ERROR_CHECK (vxAddParameterToKernel (kernel, 0 , VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
224
+ PARAM_ERROR_CHECK (vxAddParameterToKernel (kernel, 1 , VX_INPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
225
+ PARAM_ERROR_CHECK (vxAddParameterToKernel (kernel, 2 , VX_OUTPUT, VX_TYPE_TENSOR, VX_PARAMETER_STATE_REQUIRED));
226
+ PARAM_ERROR_CHECK (vxAddParameterToKernel (kernel, 3 , VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
227
+ PARAM_ERROR_CHECK (vxAddParameterToKernel (kernel, 4 , VX_INPUT, VX_TYPE_SCALAR, VX_PARAMETER_STATE_REQUIRED));
228
+ PARAM_ERROR_CHECK (vxFinalizeKernel (kernel));
229
+ }
230
+ if (status != VX_SUCCESS) {
231
+ exit:
232
+ vxRemoveKernel (kernel);
233
+ return VX_FAILURE;
234
+ }
235
+
236
+ return status;
237
+ }
0 commit comments