diff --git a/include/core/graph_builder.h b/include/core/graph_builder.h index 43b29a3..7b6a80f 100644 --- a/include/core/graph_builder.h +++ b/include/core/graph_builder.h @@ -6,6 +6,7 @@ #include "core/op_type.h" #include "operators/ElementWise.h" #include "operators/Gemm.h" +#include "operators/Clip.h" namespace infini { @@ -25,6 +26,7 @@ class GraphBuilderObj { Tensor add(Tensor A, Tensor B, std::optional Y = std::nullopt); Tensor sub(Tensor A, Tensor B, std::optional Y = std::nullopt); Tensor mul(Tensor A, Tensor B, std::optional Y = std::nullopt); + Tensor clip(Tensor A, Tensor min_val, Tensor max_val, std::optional Y = std::nullopt); string printGraph() const; Graph getGraph() const; diff --git a/include/operators/Clip.h b/include/operators/Clip.h new file mode 100644 index 0000000..62513db --- /dev/null +++ b/include/operators/Clip.h @@ -0,0 +1,28 @@ +#pragma once +#include "core/graph.h" +#include "core/operator.h" + +#include + + +namespace infini { +class ClipObj : public OperatorObj { + public: + /** + * @brief Construct a new Clip object + * + * @param graph The computation graph that this operator belongs to. + * @param input The input tensor. + * @param min_val The minimum value tensor for clipping. + * @param max_val The maximum value tensor for clipping. + * @param output The output tensor. + */ + ClipObj(GraphObj *graph, Tensor input, Tensor min_val, Tensor max_val, Tensor output); + string toString() const override; + ~ClipObj() override; + + void createOpDesc() override; + optional> inferShape() override; + vector inferDataType() const override; +}; +} // namespace infini \ No newline at end of file diff --git a/python/bindings/graph.hpp b/python/bindings/graph.hpp index b952c25..a61a5db 100644 --- a/python/bindings/graph.hpp +++ b/python/bindings/graph.hpp @@ -27,6 +27,8 @@ void bind_graph_builder(py::module &m) { py::arg("Y") = py::none()) .def("mul", &GraphBuilderObj::mul, py::arg("A"), py::arg("B"), py::arg("Y") = py::none()) + .def("clip", &GraphBuilderObj::clip, py::arg("A"), py::arg("min_val"), py::arg("max_val"), + py::arg("Y") = py::none()) .def("to_string", &GraphBuilderObj::printGraph) .def_property_readonly("graph", &GraphBuilderObj::getGraph); } diff --git a/python/src/infinitensor/converter/unified_converters.py b/python/src/infinitensor/converter/unified_converters.py index 40f7842..6265905 100644 --- a/python/src/infinitensor/converter/unified_converters.py +++ b/python/src/infinitensor/converter/unified_converters.py @@ -25,4 +25,11 @@ def convert_add(translator, node): def convert_add(translator, node): a = translator.tensors[node.args[0]] b = translator.tensors[node.args[1]] - translator.tensors[node] = translator.builder.add(a, b, None) \ No newline at end of file + translator.tensors[node] = translator.builder.sub(a, b, None) + +@registry.register("clip","Tensor") +def convert_clip_tensor(translator, node): + a = translator.tensors[node.args[0]] + min_val = translator.tensors[node.args[1]] + max_val = translator.tensors[node.args[2]] + translator.tensors[node] = translator.builder.clip(a, min_val, max_val, None) \ No newline at end of file diff --git a/python/tests/test_torch_fx_translator.py b/python/tests/test_torch_fx_translator.py index 356b826..6519031 100644 --- a/python/tests/test_torch_fx_translator.py +++ b/python/tests/test_torch_fx_translator.py @@ -113,6 +113,39 @@ def forward(self, x, y): print("✅ Test passed!") +def test_clip(runtime, torch_rng_seed): + """Use fixtures defined in conftest.py directly""" + print(f"Testing with runtime on device: {runtime}") + print(f"Random seed: {torch_rng_seed}") + + # Create simple model + class ClipModel(torch.nn.Module): + def forward(self, x, min_val, max_val): + return torch.clip(x, min=min_val, max=max_val) + + model = ClipModel() + + # Randomly initialize inputs, passed shapes can differ from actual values, but data types must match + input_info = [((5, 4), "float32"), ((5, 4), "float32"), ((5, 4), "float32")] + input_tensors = [ + torch.as_tensor(np.random.randn(*shape).astype(dtype)) + for shape, dtype in input_info + ] + + # Create translator + translator = TorchFXTranslator(runtime) + translator.import_from_fx(model, input_tensors) + + translator.run(input_tensors) + # Get outputs + outputs = translator.get_outputs() + + # Verify + assert len(outputs) == 1 + assert outputs[0].shape == (5, 4) + print("✅ Test passed!") + + if __name__ == "__main__": # Can run this file directly import sys diff --git a/src/core/graph_builder.cc b/src/core/graph_builder.cc index 3fa1e0f..35756d7 100644 --- a/src/core/graph_builder.cc +++ b/src/core/graph_builder.cc @@ -44,6 +44,17 @@ Tensor GraphBuilderObj::gemm(Tensor A, Tensor B, Tensor C, float alpha, } \ } +Tensor GraphBuilderObj::clip(Tensor A, Tensor min_val, Tensor max_val, std::optional Y) { + if (Y.has_value()) { + g->addOpWithOutputs(std::move(A), std::move(min_val), std::move(max_val), std::move(Y.value())); + return Y.value(); + } else { + return g + ->addOp(std::move(A), std::move(min_val), std::move(max_val), nullptr) + ->getOutput(0); + } +} + DEFINE_BINARY_OP(add, OpType::Add); DEFINE_BINARY_OP(sub, OpType::Sub); DEFINE_BINARY_OP(mul, OpType::Mul); diff --git a/src/kernels/Clip.cc b/src/kernels/Clip.cc new file mode 100644 index 0000000..c59a2ed --- /dev/null +++ b/src/kernels/Clip.cc @@ -0,0 +1,27 @@ +#include "core/runtime.h" +#include "operators/Clip.h" + +namespace infini { + +class ClipOp : public Kernel { + void compute(const Operator &_op, + const RuntimeObj *runtime) const override { + auto op = as(_op); + op->createOpDesc(); + void *yData = (op->getOutput(0)->getRawDataPtr()); + void *const aData = (op->getInput(0)->getRawDataPtr()); + void *const min_val = (op->getInput(1)->getRawDataPtr()); + void *const max_val = (op->getInput(2)->getRawDataPtr()); + size_t workspace_size = 0; + CHECK_INFINI_ERROR(infiniopGetClipWorkspaceSize( + (infiniopClipDescriptor_t)op->getInfiniOpDesc(), &workspace_size)); + void *workspace = runtime->getWorkspace(workspace_size); + CHECK_INFINI_ERROR(infiniopClip( + (infiniopClipDescriptor_t)op->getInfiniOpDesc(), workspace, + workspace_size, yData, aData, min_val, max_val, + runtime->getCurrentThreadContext()->stream)); + } +}; +// 执行注册机制,将算子和对应的计算方式进行绑定并添加到对应的注册表中 +REGISTER_KERNEL_ALL_DEVICES(OpType::Clip, ClipOp); +} // namespace infini \ No newline at end of file diff --git a/src/operators/Clip.cc b/src/operators/Clip.cc new file mode 100644 index 0000000..f605d00 --- /dev/null +++ b/src/operators/Clip.cc @@ -0,0 +1,92 @@ +#include "operators/Clip.h" +#include "core/runtime.h" + +namespace infini { + +ClipObj::ClipObj(GraphObj *graph, Tensor input, Tensor min_val, Tensor max_val, + Tensor output) + : OperatorObj(OpType::Clip, TensorVec{input, min_val, max_val}, {output}) { + IT_ASSERT(checkValid(graph)); +} + +string ClipObj::toString() const { + std::ostringstream os; + os << "Clip("; + os << "input=" << inputs[0]->getGuid() << ","; + os << "min_val=" << inputs[1]->getGuid() << ","; + os << "max_val=" << inputs[2]->getGuid() << ","; + os << "output=" << outputs[0]->getGuid() << ")"; + return os.str(); +} + +ClipObj::~ClipObj() { + if (infiniOpDesc) { + infiniStatus_t err = INFINI_STATUS_SUCCESS; + err = infiniopDestroyClipDescriptor((infiniopClipDescriptor_t)infiniOpDesc); + if (err != INFINI_STATUS_SUCCESS) { + std::cerr << "Warning: Clip descriptor destroy failed with error code " + << err << std::endl; + } + } +} + +optional> ClipObj::inferShape() { + // Clip does not change the shape of the input tensor + // Simply return the input shape as-is (supports both concrete and symbolic shapes) + auto inputShape = inputs[0]->getShape(); + return {{inputShape}}; +} + +vector ClipObj::inferDataType() const { + return {inputs[0]->getDataType()}; +} + +void ClipObj::createOpDesc() { + auto yShape = outputs[0]->getShape(); + auto yStride = outputs[0]->getStride(); + + auto xShape = inputs[0]->getShape(); + auto xStride = inputs[0]->getStride(); + + auto minValShape = inputs[1]->getShape(); + auto minValStride = inputs[1]->getStride(); + + auto maxValShape = inputs[2]->getShape(); + auto maxValStride = inputs[2]->getStride(); + + infiniopTensorDescriptor_t yTensor, xTensor, minValTensor, maxValTensor; + + CHECK_INFINI_ERROR(infiniopCreateTensorDescriptor( + &yTensor, yShape->size(), yShape->getConstantValue().data(), + yStride->getConstantValue().data(), + outputs[0]->getDataType().getType())); + + CHECK_INFINI_ERROR(infiniopCreateTensorDescriptor( + &xTensor, xShape->size(), xShape->getConstantValue().data(), + xStride->getConstantValue().data(), + inputs[0]->getDataType().getType())); + + CHECK_INFINI_ERROR(infiniopCreateTensorDescriptor( + &minValTensor, minValShape->size(), minValShape->getConstantValue().data(), + minValStride->getConstantValue().data(), + inputs[1]->getDataType().getType())); + + CHECK_INFINI_ERROR(infiniopCreateTensorDescriptor( + &maxValTensor, maxValShape->size(), maxValShape->getConstantValue().data(), + maxValStride->getConstantValue().data(), + inputs[2]->getDataType().getType())); + + infiniopHandle_t handle = nullptr; + CHECK_INFINI_ERROR(infiniopCreateHandle(&handle)); + + CHECK_INFINI_ERROR(infiniopCreateClipDescriptor( + handle, (infiniopClipDescriptor_t *)&infiniOpDesc, yTensor, xTensor, + minValTensor, maxValTensor)); + + CHECK_INFINI_ERROR(infiniopDestroyTensorDescriptor(yTensor)); + CHECK_INFINI_ERROR(infiniopDestroyTensorDescriptor(xTensor)); + CHECK_INFINI_ERROR(infiniopDestroyTensorDescriptor(minValTensor)); + CHECK_INFINI_ERROR(infiniopDestroyTensorDescriptor(maxValTensor)); +} + +} // namespace infini \ No newline at end of file diff --git a/test/kernels/test_clip_kernel.cc b/test/kernels/test_clip_kernel.cc new file mode 100644 index 0000000..6f2d79d --- /dev/null +++ b/test/kernels/test_clip_kernel.cc @@ -0,0 +1,442 @@ +#include "core/runtime.h" +#include "operators/Clip.h" +#include "utils/test_utils.h" +#include "gtest/gtest.h" + +namespace infini { + +// Thread test parameters +template struct ClipThreadTestParams { + infiniDevice_t device = INFINI_DEVICE_CPU; + int deviceId = 0; + Shape inputShape; + DataType dataType = DataType(INFINI_DTYPE_F32); + float minVal = 0.0f; + float maxVal = 1.0f; + std::vector inputData; + std::vector outputData; + bool completed = false; + std::string deviceName; +}; + +// Device thread function +template +void clipDeviceThreadFunc(ClipThreadTestParams ¶ms) { + RuntimeObj::init(); + Runtime &runtime = RuntimeObj::getInstance(); + + // Initialize device Context + runtime->initThreadContext(params.device, params.deviceId); + + // Create Graph + Graph g = make_ref(runtime); + auto input = g->addTensor(params.inputShape, params.dataType); + // min_val and max_val must have the same shape as input for InfiniCore Clip + auto min_val = g->addTensor(params.inputShape, params.dataType); + auto max_val = g->addTensor(params.inputShape, params.dataType); + + auto op = g->addOp(input, min_val, max_val, nullptr); + + // Set input data + input->setData(params.inputData.data()); + + // Set min/max values - broadcast to match input shape + size_t numElements = 1; + for (auto dim : params.inputShape) + numElements *= dim; + std::vector minData(numElements, static_cast(params.minVal)); + std::vector maxData(numElements, static_cast(params.maxVal)); + min_val->setData(minData.data()); + max_val->setData(maxData.data()); + + runtime->dataMalloc(g); + + // Run computation + runtime->run(g); + + // Get output and copy to host + auto output = op->getOutput(0); + size_t outputNumElements = output->getElement(); + params.outputData.resize(outputNumElements); + + // Check if output data exists + auto dataBlob = output->getData(); + if (!dataBlob) { + throw std::runtime_error("Output data blob is null!"); + } + void *devicePtr = dataBlob->getRawDataPtr(); + if (!devicePtr && !runtime->isCpu()) { + throw std::runtime_error( + "Output device pointer is null on GPU device!"); + } + + // Copy result data + void *hostPtr = runtime->allocHost(output->getTotalBytes()); + runtime->memcpy(hostPtr, devicePtr, output->getTotalBytes(), + INFINIRT_MEMCPY_D2H); + + // Use generic function for data copy and conversion + copyAndConvertData(params.outputData, hostPtr, outputNumElements, + params.dataType); + + runtime->deallocHost(hostPtr); + params.completed = true; +} + +// Data generator function type +template +using ClipDataGeneratorFunc = std::function(size_t, T, T)>; + +// Expected clip result calculation +template +std::vector computeExpectedClip(const std::vector &inputData, + float minVal, float maxVal) { + std::vector expected(inputData.size()); + for (size_t i = 0; i < inputData.size(); ++i) { + float val = static_cast(inputData[i]); + float clipped = std::min(std::max(val, minVal), maxVal); + expected[i] = static_cast(clipped); + } + return expected; +} + +// Run multi-thread test +template +void runClipMultiThreadTest( + const Shape &inputShape, float minVal, float maxVal, + const DataType &dataType, + ClipDataGeneratorFunc dataGenerator = generateRandomData, + bool print = false) { + + // Prepare input data + size_t numElements = 1; + for (auto dim : inputShape) + numElements *= dim; + + // Use the passed data generator function + auto inputData = dataGenerator(numElements, static_cast(-10), + static_cast(10)); + + // Create thread parameters + ClipThreadTestParams cpuParams, gpuParams; + + // CPU thread parameters + cpuParams.device = INFINI_DEVICE_CPU; + cpuParams.deviceId = 0; + cpuParams.inputShape = inputShape; + cpuParams.dataType = dataType; + cpuParams.minVal = minVal; + cpuParams.maxVal = maxVal; + cpuParams.inputData = inputData; + cpuParams.deviceName = "CPU"; + + // GPU thread parameters + gpuParams.device = INFINI_DEVICE_NVIDIA; + gpuParams.deviceId = 0; + gpuParams.inputShape = inputShape; + gpuParams.dataType = dataType; + gpuParams.minVal = minVal; + gpuParams.maxVal = maxVal; + gpuParams.inputData = inputData; + gpuParams.deviceName = "NVIDIA"; + + if (print) { + std::cout << "========================================" << std::endl; + std::cout << "Running Multi-Thread Clip Test" << std::endl; + std::cout << "DataType: " << dataType.toString() << std::endl; + std::cout << "Input Shape: " << vecToString(inputShape) << std::endl; + std::cout << "Min: " << minVal << ", Max: " << maxVal << std::endl; + std::cout << "Thread 1: CPU (" << dataType.toString() << ")" + << std::endl; + std::cout << "Thread 2: NVIDIA (" << dataType.toString() << ")" + << std::endl; + std::cout << "========================================" << std::endl; + } + + // Launch two threads for parallel execution + std::thread cpuThread(clipDeviceThreadFunc, std::ref(cpuParams)); + std::thread gpuThread(clipDeviceThreadFunc, std::ref(gpuParams)); + + // Wait for both threads to complete + cpuThread.join(); + gpuThread.join(); + + // Verify results + ASSERT_TRUE(cpuParams.completed) << "CPU thread failed"; + ASSERT_TRUE(gpuParams.completed) << "NVIDIA thread failed"; + + ASSERT_EQ(cpuParams.outputData.size(), gpuParams.outputData.size()) + << "Output size mismatch"; + + // Compare results + size_t numErrors = 0; + float maxError = 0.0f; + const float epsilon = 1e-2f; + + for (size_t i = 0; i < cpuParams.outputData.size(); ++i) { + float cpuVal, gpuVal; + + // Convert to float for comparison + if constexpr (std::is_same_v) { + cpuVal = cpuParams.outputData[i]; + gpuVal = gpuParams.outputData[i]; + } else if constexpr (std::is_same_v) { + // FP16 to FP32 comparison + cpuVal = fp16_to_fp32(cpuParams.outputData[i]); + gpuVal = fp16_to_fp32(gpuParams.outputData[i]); + } + + float error = std::abs(cpuVal - gpuVal); + maxError = std::max(maxError, error); + + if (error > epsilon) { + numErrors++; + if (numErrors <= 5) { // Only print first 5 errors + std::cout << "Mismatch at index " << i << ": CPU=" << cpuVal + << ", NVIDIA=" << gpuVal << ", error=" << error + << std::endl; + } + } + } + + if (print) { + std::cout << "Result Comparison:" << std::endl; + std::cout << " Total elements: " << cpuParams.outputData.size() + << std::endl; + std::cout << " Errors: " << numErrors << std::endl; + std::cout << " Max error: " << maxError << std::endl; + + if (numErrors == 0) { + std::cout << " Test PASSED" << std::endl; + } else { + std::cout << " Test FAILED" << std::endl; + } + std::cout << "========================================" << std::endl; + } + + EXPECT_EQ(numErrors, 0) + << "Results mismatch between CPU and NVIDIA (max error: " << maxError + << ")"; +} + +// Basic Clip operation test - F32 +TEST(Clip, Basic_MultiThread_F32) { + Shape inputShape = {3, 4}; + float minVal = 2.0f; + float maxVal = 7.0f; + +#ifdef USE_CUDA + runClipMultiThreadTest(inputShape, minVal, maxVal, + DataType(INFINI_DTYPE_F32), + generateSequentialData, true); +#else + std::cout << "CUDA not enabled, skipping multi-thread test" << std::endl; +#endif +} + +// Basic Clip operation test - F16 +TEST(Clip, Basic_MultiThread_F16) { + Shape inputShape = {3, 4}; + float minVal = 2.0f; + float maxVal = 7.0f; + +#ifdef USE_CUDA + runClipMultiThreadTest(inputShape, minVal, maxVal, + DataType(INFINI_DTYPE_F16), + generateSequentialData, true); +#else + std::cout << "CUDA not enabled, skipping multi-thread test" << std::endl; +#endif +} + +// Clip with negative min value - F32 +TEST(Clip, NegativeMin_MultiThread_F32) { + Shape inputShape = {4, 5}; + float minVal = -5.0f; + float maxVal = 5.0f; + +#ifdef USE_CUDA + runClipMultiThreadTest(inputShape, minVal, maxVal, + DataType(INFINI_DTYPE_F32), + generateRandomData); +#endif +} + +// Clip with large values - F32 +TEST(Clip, LargeValues_MultiThread_F32) { + Shape inputShape = {2, 8}; + float minVal = -100.0f; + float maxVal = 100.0f; + +#ifdef USE_CUDA + runClipMultiThreadTest(inputShape, minVal, maxVal, + DataType(INFINI_DTYPE_F32), + generateRandomData); +#endif +} + +// Single device test - CPU +TEST(Clip, SingleDevice_CPU) { + RuntimeObj::init(); + Runtime &runtime = RuntimeObj::getInstance(); + runtime->initThreadContext(INFINI_DEVICE_CPU, 0); + + Shape inputShape = {3, 4}; + float minVal = 2.0f; + float maxVal = 7.0f; + + Graph g = make_ref(runtime); + auto input = g->addTensor(inputShape, DataType(INFINI_DTYPE_F32)); + // min_val and max_val must have the same shape as input for InfiniCore Clip + auto min_val = g->addTensor(inputShape, DataType(INFINI_DTYPE_F32)); + auto max_val = g->addTensor(inputShape, DataType(INFINI_DTYPE_F32)); + + auto op = g->addOp(input, min_val, max_val, nullptr); + + // Set input data + size_t numElements = input->getElement(); + std::vector inputData(numElements); + for (size_t i = 0; i < numElements; ++i) { + // Some values below min, some above max, some in between + inputData[i] = static_cast(i) - 5.0f; + } + + // Set min/max values - broadcast to match input shape + std::vector minData(numElements, minVal); + std::vector maxData(numElements, maxVal); + input->setData(inputData.data()); + min_val->setData(minData.data()); + max_val->setData(maxData.data()); + + runtime->dataMalloc(g); + + // Execute computation + runtime->run(g); + + // Get output and verify + auto output = op->getOutput(0); + std::cout << "Input Data: " << std::endl; + input->printData(runtime); + std::cout << "Clip(" << minVal << ", " << maxVal << ") Output Data: " + << std::endl; + output->printData(runtime); + + // Verify expected values + auto expected = computeExpectedClip(inputData, minVal, maxVal); + std::vector outputData(numElements); + + void *hostPtr = runtime->allocHost(output->getTotalBytes()); + auto dataBlob = output->getData(); + runtime->memcpy(hostPtr, dataBlob->getRawDataPtr(), output->getTotalBytes(), + INFINIRT_MEMCPY_D2H); + copyAndConvertData(outputData, hostPtr, numElements, + DataType(INFINI_DTYPE_F32)); + runtime->deallocHost(hostPtr); + + // Check results + size_t errors = 0; + for (size_t i = 0; i < numElements; ++i) { + if (std::abs(outputData[i] - expected[i]) > 1e-5f) { + errors++; + if (errors <= 5) { + std::cout << "Error at index " << i << ": expected=" + << expected[i] << ", got=" << outputData[i] + << std::endl; + } + } + } + + EXPECT_EQ(errors, 0) << "CPU clip computation failed with " << errors + << " errors"; +} + +#ifdef USE_CUDA +// Single device test - NVIDIA F32 +TEST(Clip, SingleDevice_NVIDIA_F32) { + RuntimeObj::init(); + Runtime &runtime = RuntimeObj::getInstance(); + runtime->initThreadContext(INFINI_DEVICE_NVIDIA, 0); + + Shape inputShape = {3, 4}; + float minVal = 2.0f; + float maxVal = 7.0f; + + Graph g = make_ref(runtime); + auto input = g->addTensor(inputShape, DataType(INFINI_DTYPE_F32)); + // min_val and max_val must have the same shape as input for InfiniCore Clip + auto min_val = g->addTensor(inputShape, DataType(INFINI_DTYPE_F32)); + auto max_val = g->addTensor(inputShape, DataType(INFINI_DTYPE_F32)); + + auto op = g->addOp(input, min_val, max_val, nullptr); + + // Set input data + size_t numElements = input->getElement(); + std::vector inputData(numElements); + for (size_t i = 0; i < numElements; ++i) { + inputData[i] = static_cast(i) - 5.0f; + } + + // Set min/max values - broadcast to match input shape + std::vector minData(numElements, minVal); + std::vector maxData(numElements, maxVal); + input->setData(inputData.data()); + min_val->setData(minData.data()); + max_val->setData(maxData.data()); + + runtime->dataMalloc(g); + + // Execute computation + runtime->run(g); + + // Get output and print + auto output = op->getOutput(0); + std::cout << "NVIDIA F32 Output Data: " << std::endl; + output->printData(runtime); +} + +// Single device test - NVIDIA F16 +TEST(Clip, SingleDevice_NVIDIA_F16) { + RuntimeObj::init(); + Runtime &runtime = RuntimeObj::getInstance(); + runtime->initThreadContext(INFINI_DEVICE_NVIDIA, 0); + + Shape inputShape = {3, 4}; + float minVal = 2.0f; + float maxVal = 7.0f; + + Graph g = make_ref(runtime); + auto input = g->addTensor(inputShape, DataType(INFINI_DTYPE_F16)); + // min_val and max_val must have the same shape as input for InfiniCore Clip + auto min_val = g->addTensor(inputShape, DataType(INFINI_DTYPE_F16)); + auto max_val = g->addTensor(inputShape, DataType(INFINI_DTYPE_F16)); + + auto op = g->addOp(input, min_val, max_val, nullptr); + + // Set input data + size_t numElements = input->getElement(); + std::vector inputData(numElements); + for (size_t i = 0; i < numElements; ++i) { + // Generate sequential values that will test clipping + inputData[i] = fp32_to_fp16(static_cast(i) - 5.0f); + } + + // Set min/max values - broadcast to match input shape + std::vector minData(numElements, fp32_to_fp16(minVal)); + std::vector maxData(numElements, fp32_to_fp16(maxVal)); + input->setData(inputData.data()); + min_val->setData(minData.data()); + max_val->setData(maxData.data()); + + runtime->dataMalloc(g); + + // Execute computation + runtime->run(g); + + // Get output and print + auto output = op->getOutput(0); + std::cout << "NVIDIA F16 Output Data: " << std::endl; + output->printData(runtime); +} +#endif + +} // namespace infini \ No newline at end of file diff --git a/test/operators/test_clip_op.cc b/test/operators/test_clip_op.cc new file mode 100644 index 0000000..dbd6d8e --- /dev/null +++ b/test/operators/test_clip_op.cc @@ -0,0 +1,151 @@ +#include "core/runtime.h" +#include "operators/Clip.h" +#include "gtest/gtest.h" + +namespace infini { + +class ClipBasicTest : public testing::Test { + protected: + Runtime runtime; + Graph graph; + + void SetUp() override { + runtime = make_ref(); + graph = make_ref(runtime); + } +}; + +// Test basic construction of Clip +TEST_F(ClipBasicTest, BasicConstruction) { + auto input = graph->addTensor({2, 3, 4}, DataType(INFINI_DTYPE_F32)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + EXPECT_EQ(clip->getOpType(), OpType::Clip); + EXPECT_EQ(clip->getNumInputs(), 3); + EXPECT_EQ(clip->getNumOutputs(), 1); +} + +// Test Clip shape inference - same shape as input +TEST_F(ClipBasicTest, ShapeInferenceSameShape) { + auto input = graph->addTensor({2, 3, 4}, DataType(INFINI_DTYPE_F32)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + + auto inferredShapes = clip->inferShape(); + ASSERT_TRUE(inferredShapes.has_value()); + ASSERT_EQ(inferredShapes->size(), 1); + + auto outputShape = (*inferredShapes)[0]; + EXPECT_TRUE(outputShape->isConcrete()); + + auto shapeValues = outputShape->getConstantValue(); + EXPECT_EQ(shapeValues.size(), 3); + EXPECT_EQ(shapeValues[0], 2); + EXPECT_EQ(shapeValues[1], 3); + EXPECT_EQ(shapeValues[2], 4); +} + +// Test Clip shape inference - 1D tensor +TEST_F(ClipBasicTest, ShapeInference1D) { + auto input = graph->addTensor({100}, DataType(INFINI_DTYPE_F32)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + + auto inferredShapes = clip->inferShape(); + ASSERT_TRUE(inferredShapes.has_value()); + + auto outputShape = (*inferredShapes)[0]; + auto shapeValues = outputShape->getConstantValue(); + EXPECT_EQ(shapeValues.size(), 1); + EXPECT_EQ(shapeValues[0], 100); +} + +// Test Clip shape inference - 4D tensor (common deep learning shape) +TEST_F(ClipBasicTest, ShapeInference4D) { + auto input = graph->addTensor({2, 3, 4, 5}, DataType(INFINI_DTYPE_F32)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + + auto inferredShapes = clip->inferShape(); + ASSERT_TRUE(inferredShapes.has_value()); + + auto outputShape = (*inferredShapes)[0]; + auto shapeValues = outputShape->getConstantValue(); + EXPECT_EQ(shapeValues.size(), 4); + EXPECT_EQ(shapeValues[0], 2); + EXPECT_EQ(shapeValues[1], 3); + EXPECT_EQ(shapeValues[2], 4); + EXPECT_EQ(shapeValues[3], 5); +} + +// Test Clip data type inference - Float32 +TEST_F(ClipBasicTest, DataTypeInferenceFloat32) { + auto input = graph->addTensor({2, 3}, DataType(INFINI_DTYPE_F32)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + + auto inferredTypes = clip->inferDataType(); + ASSERT_EQ(inferredTypes.size(), 1); + EXPECT_EQ(inferredTypes[0], DataType(INFINI_DTYPE_F32)); +} + +// Test Clip data type inference - Float64 +TEST_F(ClipBasicTest, DataTypeInferenceFloat64) { + auto input = graph->addTensor({2, 3}, DataType(INFINI_DTYPE_F64)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F64)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F64)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + + auto inferredTypes = clip->inferDataType(); + ASSERT_EQ(inferredTypes.size(), 1); + EXPECT_EQ(inferredTypes[0], DataType(INFINI_DTYPE_F64)); +} + +// Test Clip data type inference - Float16 +TEST_F(ClipBasicTest, DataTypeInferenceFloat16) { + auto input = graph->addTensor({2, 3}, DataType(INFINI_DTYPE_F16)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F16)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F16)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + + auto inferredTypes = clip->inferDataType(); + ASSERT_EQ(inferredTypes.size(), 1); + EXPECT_EQ(inferredTypes[0], DataType(INFINI_DTYPE_F16)); +} + +// Test symbolic shape inference +TEST_F(ClipBasicTest, SymbolicShapeInference) { + auto batch = ExprObj::variable("batch"); + auto height = ExprObj::variable("h"); + auto width = ExprObj::constant(256); + + auto shapeInput = ShapeExpr(new ShapeExprObj({batch, height, width})); + + auto input = graph->addTensor(shapeInput, DataType(INFINI_DTYPE_F32)); + auto min_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + auto max_val = graph->addTensor(Shape{}, DataType(INFINI_DTYPE_F32)); + + auto clip = graph->addOp(input, min_val, max_val, nullptr); + + auto inferredShapes = clip->inferShape(); + ASSERT_TRUE(inferredShapes.has_value()); + + auto outputShape = (*inferredShapes)[0]; + EXPECT_FALSE(outputShape->isConcrete()); + EXPECT_EQ(outputShape->size(), 3); + EXPECT_EQ(outputShape->toString(), "[batch, h, 256]"); +} + +} // namespace infini \ No newline at end of file